Repository: d1vanov/Simple-FFT
Branch: master
Commit: a0cc843ff36d
Files: 32
Total size: 151.5 KB

Directory structure:
gitextract_8dx02k3e/

├── CMakeLists.txt
├── LICENSE.md
├── README.md
├── benchmark-tests/
│   ├── benchmark_tests_fftw3.cpp
│   ├── benchmark_tests_fftw3.h
│   ├── benchmark_tests_main.cpp
│   └── results_Linux_Mint_14_x86_64_Intel_Core_i5_10_Gb_RAM/
│       ├── benchmark_test_multiple_transforms_GCC_release.txt
│       ├── benchmark_test_multiple_transforms_ICC_release.txt
│       ├── benchmark_test_multiple_transforms_clang_release.txt
│       ├── benchmark_test_single_transform_GCC_release.txt
│       ├── benchmark_test_single_transform_ICC_release.txt
│       └── benchmark_test_single_transform_clang_release.txt
├── include/
│   └── simple_fft/
│       ├── check_fft.hpp
│       ├── copy_array.hpp
│       ├── error_handling.hpp
│       ├── fft.h
│       ├── fft.hpp
│       ├── fft_impl.hpp
│       └── fft_settings.h
└── unit-tests/
    ├── test_fft.cpp
    ├── test_fft.h
    ├── test_fft.hpp
    ├── test_with_armadillo_matrix_and_row.cpp
    ├── test_with_blitz.cpp
    ├── test_with_boost_multiarray.cpp
    ├── test_with_boost_ublas_vector_matrix.cpp
    ├── test_with_eigen_vector_matrix.cpp
    ├── test_with_marray.cpp
    ├── test_with_native_cpp_pointer_based_arrays.cpp
    ├── test_with_std_vectors.cpp
    ├── test_with_stlsoft.cpp
    └── unit_tests_main.cpp

================================================
FILE CONTENTS
================================================

================================================
FILE: CMakeLists.txt
================================================
cmake_minimum_required(VERSION 2.6)
enable_testing()
project(simple_fft)

set(UNIT_TESTS simple_fft_unit_tests)
set(BENCHMARK_TESTS simple_fft_benchmark_tests)

set(HEADERS
        include/simple_fft/fft.h
        include/simple_fft/fft.hpp
        include/simple_fft/fft_impl.hpp
        include/simple_fft/fft_settings.h
        include/simple_fft/check_fft.hpp
        include/simple_fft/copy_array.hpp
        include/simple_fft/error_handling.hpp
   )

set(HEADERS_UNIT_TESTS
        ${HEADERS}
        unit-tests/test_fft.h
        unit-tests/test_fft.hpp
    )
    
set(SOURCES_UNIT_TESTS
        unit-tests/unit_tests_main.cpp
        unit-tests/test_fft.cpp
        unit-tests/test_with_std_vectors.cpp
        unit-tests/test_with_native_cpp_pointer_based_arrays.cpp
   )

set(HEADERS_BENCHMARK_TESTS
        ${HEADERS}
        benchmark-tests/benchmark_tests_fftw3.h
        unit-tests/test_fft.cpp
    )

set(SOURCES_BENCHMARK_TESTS
        benchmark-tests/benchmark_tests_fftw3.cpp
        benchmark-tests/benchmark_tests_main.cpp
    )
   
# Boost section
find_package(Boost QUIET)
if(Boost_FOUND)
    message(STATUS "Boost package was found")
    add_definitions("-DHAS_BOOST_PACKAGE")
    include_directories(SYSTEM "${Boost_INCLUDE_DIRS} ${SYSTEM}")
    message(STATUS "Checking for multi_array and ublas availability...")

    find_file(BOOST_MULTI_ARRAY_FILE boost/multi_array.hpp)
    if(BOOST_MULTI_ARRAY_FILE)
        message(STATUS "boost::multi_array headers were found, building a test")
        add_definitions("-DHAS_BOOST_MULTI_ARRAY")
        set(SOURCES_UNIT_TESTS "${SOURCES_UNIT_TESTS}" unit-tests/test_with_boost_multiarray.cpp)
    else()
        message(STATUS "boost::multi_array headers were not found, not building a test")
    endif()

    find_file(BOOST_UBLAS_FILE boost/numeric/ublas/matrix.hpp)
    if(BOOST_UBLAS_FILE)
        message(STATUS "boost::numeric::ublas headers were found, building a test")
        add_definitions("-DHAS_BOOST_UBLAS")
        set(SOURCES_UNIT_TESTS "${SOURCES_UNIT_TESTS}" unit-tests/test_with_boost_ublas_vector_matrix.cpp)
    else()
        message(STATUS "boost::numeric::ublas headers were not found, not building a test")
    endif()
else()
    message(STATUS "Boost package was not found, not building tests with either ")
    message(STATUS "boost::multi_array or boost::numeric::ublas")
endif()

# marray
find_path(MARRAY_DIR marray/marray.hxx)
if(MARRAY_DIR)
    include_directories(SYSTEM "${MARRAY_DIR} ${SYSTEM}")
    message(STATUS "marray header was found, building a test")
    add_definitions("-DHAS_MARRAY")
    set(SOURCES_UNIT_TESTS "${SOURCES_UNIT_TESTS}" unit-tests/test_with_marray.cpp)
else()
    message(STATUS "marray headers was not found, not building a test")
endif()

# Eigen
find_file(EIGEN_FILE eigen3/Eigen/Eigen)
if(EIGEN_FILE)
    find_path(EIGEN_DIR eigen3/Eigen/Eigen)
    if(EIGEN_DIR)
        include_directories(SYSTEM "${EIGEN_DIR} ${SYSTEM}")
        message(STATUS "Eigen headers were found, building a test")
        add_definitions("-DHAS_EIGEN")
        set(SOURCES_UNIT_TESTS "${SOURCES_UNIT_TESTS}" unit-tests/test_with_eigen_vector_matrix.cpp)
    else()
        message(STATUS "Eigen headers were not found, not building a test")
    endif()
else()
    message(STATUS "Eigen headers were not found, not building a test")
endif()

# Armadillo
find_path(ARMADILLO_INCLUDEFILE_DIR armadillo)
if(ARMADILLO_INCLUDEFILE_DIR)
    include_directories(SYSTEM "${ARMADILLO_INCLUDEFILE_DIR} ${SYSTEM}")
    message(STATUS "Armadillo include file was found, looking for library...")

    find_library(ARMADILLO_LIB OPTIONAL
                 NAMES
                 libarmadillo.so armadillo.lib libarmadillo.dll
                 )
    if(ARMADILLO_LIB)
        add_definitions("-DHAS_ARMADILLO")
        set(SOURCES_UNIT_TESTS "${SOURCES_UNIT_TESTS}" unit-tests/test_with_armadillo_matrix_and_row.cpp)
        message(STATUS "Armadillo library was found, building a test")
    else()
        message(STATUS "Armadillo library was not found, not building a test")
    endif()
else()
    message(STATUS "Armadillo include file was not found, not building a test")
endif()

# Blitz++
find_file(BLITZ_FILE blitz/blitz.h)
if(BLITZ_FILE)
    find_path(BLITZ_DIR blitz/blitz.h)
    if(BLITZ_DIR)
        include_directories(SYSTEM "${BLITZ_DIR} ${SYSTEM}")
        message(STATUS "Blitz++ headers were found, building a test")
        add_definitions("-DHAS_BLITZ")
        set(SOURCES_UNIT_TESTS "${SOURCES_UNIT_TESTS}" unit-tests/test_with_blitz.cpp)
    else()
        message(STATUS "Blitz++ headers were not found, not building a test")
    endif()
else()
    message(STATUS "Blitz++ headers were not found, not building a test")
endif()

# STLSoft
find_file(STLSOFT_FILE stlsoft/containers/fixed_array.hpp)
if(STLSOFT_FILE)
    find_path(STLSOFT_DIR stlsoft/containers/fixed_array.hpp)
    if(STLSOFT_DIR)
        include_directories(SYSTEM "${STLSOFT_DIR} ${SYSTEM}")
        message(STATUS "STLSoft headers were found, building a test")
        add_definitions("-DHAS_STLSOFT")
        set(SOURCES_UNIT_TESTS "${SOURCES_UNIT_TESTS}" unit-tests/test_with_stlsoft.cpp)
    else()
        message(STATUS "STLSoft headers were not found, not building a test")
    endif()
else()
    message(STATUS "STLSoft headers were not found, not building a test")
endif()

# fftw library for benchmark test
find_path(FFTW_INCLUDEFILE_DIR NAMES fftw3.h fftw)
if(FFTW_INCLUDEFILE_DIR)
    find_file(FFTW_INCLUDEFILE_NO_H fftw)
    if(FFTW_INCLUDEFILE_NO_H)
        add_definitions("-D_FFTW_INCLUDEFILE_NO_H")
    endif()
    include_directories(SYSTEM "${FFTW_INCLUDEFILE_DIR} ${SYSTEM}")
    message(STATUS "fftw include file was found, looking for library...")

    find_library(FFTW_LIB_OPENMP OPTIONAL
                 NAMES
                 libfftw3_omp.so
                 )

    find_library(FFTW_LIB OPTIONAL
                 NAMES
                 libfftw3.so libfftw-3.3.lib libfftw3-3.dll
                 )
    if(FFTW_LIB)
        add_definitions("-DHAS_FFTW3")
        message(STATUS "FFTW library was found, will build a benchmark test")
        if(FFTW_LIB_OPENMP)
            message(STATUS "OpenMP version of fftw3 was found and will be used")
        endif()
    else()
        message(STATUS "FFTW library was not found, won't build a benchmark test")
    endif()
else()
    message(STATUS "FFTW include file was not found, won't build a benchmark test")
endif()

add_executable(${UNIT_TESTS} ${HEADERS_UNIT_TESTS} ${SOURCES_UNIT_TESTS})
add_executable(${BENCHMARK_TESTS} ${HEADERS_BENCHMARK_TESTS} ${SOURCES_BENCHMARK_TESTS})

if(FFTW_LIB)
    target_link_libraries(${BENCHMARK_TESTS} ${FFTW_LIB})
    if(FFTW_LIB_OPENMP)
        target_link_libraries(${BENCHMARK_TESTS} ${FFTW_LIB_OPENMP})
    endif()
endif()

if(${CMAKE_CXX_COMPILER_ID} MATCHES "Clang")
    if(${CMAKE_BUILD_TYPE} MATCHES "Debug")
        set(CMAKE_CXX_FLAGS "-Wno-unknown-pragmas -Wno-unused-parameter ${CMAKE_CXX_FLAGS}")
        set(CMAKE_CXX_FLAGS "-Werror -Wextra -pedantic -pedantic-errors ${CMAKE_CXX_FLAGS}")
    else() # "Release"
        set(CMAKE_CXX_FLAGS "-mcmodel=medium -fstrict-aliasing -Wstrict-aliasing ${CMAKE_CXX_FLAGS}")
        set(CMAKE_CXX_FLAGS "-O3 -fkeep-inline-functions -fno-stack-protector ${CMAKE_CXX_FLAGS}")
    endif()
else() # not clang
    find_package(OpenMP QUIET)
    if(OPENMP_FOUND)
        message(STATUS "OpenMP found.")
        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
        set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
        add_definitions(-D__USE_OPENMP)
    else()
        message(STATUS "OpenMP not found, parallelization for FFT and tests will be disabled")
    endif() # OpenMP

    if(${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU")
        set(CMAKE_CXX_FLAGS "-fopenmp -lgomp -Wno-unused-parameter -fstrict-aliasing ${CMAKE_CXX_FLAGS}")
        if(${CMAKE_BUILD_TYPE} MATCHES "Debug")
            set(CMAKE_CXX_FLAGS "-Wall -Wextra -Wshadow -Werror -ansi -pedantic-errors ${CMAKE_CXX_FLAGS}")
            set(CMAKE_CXX_FLAGS "-Wno-long-long -Wuninitialized -Wstrict-aliasing ${CMAKE_CXX_FLAGS}")
            set(CMAKE_CXX_FLAGS "-Winit-self -Wno-missing-declarations -Woverloaded-virtual ${CMAKE_CXX_FLAGS}")
            set(CMAKE_CXX_FLAGS "-Weffc++ -Wcast-align -Wcast-qual -Wpointer-arith ${CMAKE_CXX_FLAGS}")
            set(CMAKE_CXX_FLAGS "-Wformat=2 -Wnon-virtual-dtor ${CMAKE_CXX_FLAGS}")
        else() # "Release"
            set(CMAKE_CXX_FLAGS "-O3 -march=native -mtune=native -ffast-math ${CMAKE_CXX_FLAGS}")
        endif()
    elseif(${CMAKE_CXX_COMPILER_ID} STREQUAL "Intel")
        if(OPENMP_FOUND)
            set(CMAKE_CXX_FLAGS "-openmp -parallel ${CMAKE_CXX_FLAGS}")
            find_library(omp5_lib REQUIRED
                         NAMES
                         libiomp5.so
                         )
            set(LIBS ${LIBS} ${omp5_lib})
            target_link_libraries(${UNIT_TESTS} ${omp5_lib})
            target_link_libraries(${BENCHMARK_TESTS} ${omp5_lib})
        endif() # OpenMP
        if(${CMAKE_BUILD_TYPE} MATCHES "Debug")
            set(CMAKE_CXX_FLAGS "-debug -Wall -Werror -Winline -Wdeprecated -Wno-missing-prototypes ${CMAKE_CXX_FLAGS}")
            set(CMAKE_CXX_FLAGS "-Wcomment -Wdeprecated -Wformat-security -Wmain -Wno-missing-declarations ${CMAKE_CXX_FLAGS}")
            set(CMAKE_CXX_FLAGS "-Woverflow -Wpointer-arith -Woverloaded-virtual -Wpointer-arith ${CMAKE_CXX_FLAGS}")
            set(CMAKE_CXX_FLAGS "-Wreturn-type -Wstrict-prototypes -Wtrigraphs -Wuninitialized ${CMAKE_CXX_FLAGS}")
            set(CMAKE_CXX_FLAGS "-Wunknown-pragmas -Wno-unused-function -Wno-unused-variable ${CMAKE_CXX_FLAGS}")
        else() # "Release"
            set(CMAKE_CXX_FLAGS "-O3 -march=core-avx-i ${CMAKE_CXX_FLAGS}")
        endif()
    elseif(${CMAKE_CXX_COMPILER_ID} STREQUAL "MSVC" OR
           ${CMAKE_CXX_COMPILER_ID} STREQUAL "MSVC10" OR
           ${CMAKE_CXX_COMPILER_ID} STREQUAL "MSVC80" OR
           ${CMAKE_CXX_COMPILER_ID} STREQUAL "MSVC90")
        set(CMAKE_CXX_FLAGS "/D_SCL_SECURE_NO_WARNINGS /D_CRT_SECURE_NO_WARNINGS ${CMAKE_CXX_FLAGS}")
        set(CMAKE_CXX_FLAGS "/wd4100 /wd4503 ${CMAKE_CXX_FLAGS}")
        if(OPENMP_FOUND)
            set(CMAKE_CXX_FLAGS "/openmp ${CMAKE_CXX_FLAGS}")
        endif()
    else()
        message(FATAL_ERROR "Unsupported tool chain.")
    endif()
endif()


================================================
FILE: LICENSE.md
================================================
Copyright (c) 2013-2020 Dmitry Ivanov

The MIT License (MIT)

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


================================================
FILE: README.md
================================================
Simple-FFT
==========

**Header-only C++ library implementing fast Fourier transform of 1D, 2D and 3D data.**

### What's this

Simple FFT is a C++ library implementing fast Fourier transform. The implemented FFT is a radix-2 Cooley-Turkey algorithm. This algorithm can't handle transform of data which size is not a power of 2. It is not the most optimal known FFT algorithm.

The library is header-only, you don't need to build anything, just include the files in your project. The user-level interface is simple and reminds the one typically found in mathematical software.

The library is distributed under MIT license

#### Why one more FFT library? Does it have any benefits compared to existing libraries?

Why do humans create things even though they already have something similar? Because they are not satisfied with what they have. So was my impression with existing libraries implementing fast Fourier transforms. My desires were:
 * Free & open-source library
 * License allowing to use the library in both open and closed-source software.
 * Convenient API somewhat similar to the one found in typical mathematical software.
 * Limited size and dependencies, ideally no dependencies (handy for multiple supported platforms and compilers).
 * As long as my arrays are not going to be huge, I don't need the fastest FFT in the galaxy, I'd be quite happy with some algorithm getting the job done.
 * Having said that my arrays are not too large, I still want minimal or no overhead for copying the data to objects of types used in the library.

The last statement deserves some explanation: many popular libraries performing FFT use their own data types, sometimes even their own containers. If you already have the data of your own type/container, you are going to either manually transform (e.g. copy) the data or try to cast pointers. Both ways are not elegant.

I didn't find the solution corresponding to the whole wishlist of mine. So I decided to create my own simple library. I've already mentioned some of disadvantages of such approach:
 * Not the fastest algorithm known nowadays.
 * Can't handle data which size is not a power of 2.

The advantages of Simple FFT behind some well-known and widely used libraries are:
 * Tiny size - just some header files.
 * No need to build it and link a library to your project.
 * Can handle 1D, 2D and 3D arrays, extendable for larger dimensions.
 * Designed to support any _reasonable_ multidimensional array/container type one can imagine.

Again, the last statement needs some details to be revealed: C++ natively supports multidimensional arrays via pointers and also via "std vector of std vectors" approach. However, there are a lot of libraries with their own implementations of multidimensional arrays. I wanted to create a library which in theory can use _any_ type of multidimensional array without data copying or pointer casting. It is not possible to guarantee that my library will work with every multidimensional array type you can imagine but there is only a limited number of restrictions for used types.

#### How the API is convenient? How to actually use the library?

By convenience of API I mean the interface somewhat similar to that found in mathematical software like Mathcad, Matlab, Octave, Scilab etc. The simplest API you can think of is something like `A = FFT(B)`. It is not very easy to efficiently implement in C++ (well, without move semantic of C++11 at least) because with such interface you are going to return the result of B transform by value which means data copying i.e. overhead (unless return by value optimization is employed). So in C++ it is better to return the result by reference. The function can also return boolean which would indicate whether the transform was successful or not. So the simplest interface would look like

```c++
 b = FFT(A,B); // FFT from A to B
```

But FFT algorithm requires the knowledge of shape and dimensionality of used arrays. Most multidimensional array implementations can provide this information but they do it in different ways and I wanted something very generic. So I decided to create functions with the same name but different signature depending on the number of dimensions:

```c++
 b = FFT(A,B,n); // FFT from A to B where A and B are 1D arrays with n elements
 b = FFT(A,B,n,m); // FFT from A to B where A and B are matrices (2D arrays) with n rows and m columns
 b = FFT(A,B,n,m,l); // FFT from A to B where A and B are 3D arrays with n rows, m columns and l depth number;
```

One more thing: if the returned value is false then some error has happened. In order to protect user from debugging into 3rdparty library to figure out what happened I decided to return error description as C-style string (because some people don't use `std::string`). So the interface became looking like this:

```c++
 const char * error = NULL; // error description
 b = FFT(A,B,n,error); // FFT from A to B where A and B are 1D arrays with n elements
 b = FFT(A,B,n,m,error); // FFT from A to B where A and B are matrices (2D arrays) with n rows and m columns
 b = FFT(A,B,n,m,l,error); // FFT from A to B where A and B are 3D arrays with n rows, m columns and l depth number;
```

But how about the inverse transform? The flag can be used to tell the forward transform from inverse but I thought that different function names would be easier: FFT for forward transform and IFFT for inverse transform:

```c++
 const char * error = NULL; // error description
 b = FFT(A,B,n,error); // forward FFT from A to B where A and B are 1D arrays with n elements
 b = FFT(A,B,n,m,error); // forward FFT from A to B where A and B are matrices (2D arrays) with n rows and m columns
 b = FFT(A,B,n,m,l,error); // forward FFT from A to B where A and B are 3D arrays with n rows, m columns and l depth number;
 b = IFFT(B,A,n,error); // inverse FFT from B to A where A and B are 1D arrays with n elements
 b = IFFT(B,A,n,m,error); // inverse FFT from B to A where A and B are matrices (2D arrays) with n rows and m columns
 b = IFFT(B,A,n,m,l,error); // inverse FFT from B to A where A and B are 3D arrays with n rows, m columns and l depth number;
```

Beyond that there are only two settings:
* User needs to define two types called `real_type` and `complex_type`. These are needed by design to avoid extra problems with template instantiation
* User needs to define macro `__USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR` if the multidimensional array type you want to use accesses elements via `operator[]` (for example, if it is native C++ multidimensional array or `boost::multi_array` or something similar). Otherwise the library will attempt to use `operator()` for element access.

That's the whole explanation of API.

#### How does it work with multiple libraries implementing matrices and multidimensional arrays in C++ without being aware of those?

Well, the common generic technique of C++ was used - templates. I also added two `typedef`ed types (`real_type` and `complex_type`) to avoid overcomplication of code. But it's not all about templates - I also tried to implement the most generic element access I could imagine. After a bit of thinking I realized that in terms of interface different libraries implementing multidimensional arrays commonly differ only by their element access operator - some implementations use `operator[]` and others - `operator()`. So I splitted every code section using element access operator into two code blocks - the one with `operator[]` and the one with `operator()`. The switch between them is controlled by macro `__USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR` - if it's defined, `operator[]` is used, if not - `operator()`. User can define this macro in some translation units and not define/undef in other ones - just like I did with unit-tests.

#### Are there any examples or, even better, tests?
I wrote some tests illustrating how to use SimpleFFT library along with some well-known C++ libraries implementing multidimensional arrays. They can also serve as examples (probably, a bit overcomplicated). The tests are based on the following checks:
* after each forward of inverse FFT [Parseval's theorem](https://en.wikipedia.org/wiki/Parseval%27s_theorem) must be satisfied for input and output data
* after each sequence of FFT and IFFT the energy conservation law must be satisfied
* after each sequence of FFT and IFFT the result should be the very same as initial input; so I decided to measure the largest discrepancy between the result and input and calculate the relative error there. If it is small enough (less than 0.01%), this test is considered passed.

I also implemented a simple benchmark test comparing the execution time of multiple loops of Simple FFT and fftw3. The results for Linux with three compilers can be found in the "benchmark-tests" folder. As expected, fftw3 is much faster.

#### There are multiple files here, which I should use?
There are only two files of interest for library user:
* `/include/simple_fft/fft_settings.h`
* `/include/simple_fft/fft.h`

The first one is supposed to contain `typedef`s for `real_type` and `complex_type` and, if needed, the define of `__USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR` macro. This stuff can also be done somewhere else. The second file is what's actually needed to calculate FFT and IFFT: it contains only API declarations and includes some of other files.


================================================
FILE: benchmark-tests/benchmark_tests_fftw3.cpp
================================================
#include "../include/simple_fft/fft_settings.h"

#ifndef __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
#define __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
#endif

#include "benchmark_tests_fftw3.h"
#include "../unit-tests/test_fft.hpp"
#include <vector>
#include <complex>
#include <ctime>
#include <iostream>
#include <iomanip>
#include <fftw3.h>

namespace simple_fft {
namespace fft_test {

bool BenchmarkTestAgainstFFTW3()
{
    bool res;
    const char * err_str = NULL;
    const int numFFTLoops1D = 10000;
    const int numFFTLoops2D = 500;
    const int numFFTLoops3D = 15;

    using namespace pulse_params;

    std::vector<real_type> t, x, y;
    makeGridsForPulse3D(t, x, y);

    // typedefing vectors
    typedef std::vector<real_type> RealArray1D;
    typedef std::vector<complex_type> ComplexArray1D;
    typedef std::vector<std::vector<real_type> > RealArray2D;
    typedef std::vector<std::vector<complex_type> > ComplexArray2D;
    typedef std::vector<std::vector<std::vector<real_type> > > RealArray3D;
    typedef std::vector<std::vector<std::vector<complex_type> > > ComplexArray3D;

    // 1D fields and spectrum
    RealArray1D E1_real(nt);
    ComplexArray1D E1_complex(nt), G1(nt);

    // 2D fields and spectrum
    RealArray2D E2_real(nt);
    ComplexArray2D E2_complex(nt), G2(nt);

    int grid_size_t = static_cast<int>(nt);

#ifndef __clang__
#ifdef __USE_OPENMP
#pragma omp parallel for
#endif
#endif
    for(int i = 0; i < grid_size_t; ++i) {
        E2_real[i].resize(nx);
        E2_complex[i].resize(nx);
        G2[i].resize(nx);
    }

    // 3D fields and spectrum
    RealArray3D E3_real(nt);
    ComplexArray3D E3_complex(nt), G3(nt);

    int grid_size_x = static_cast<int>(nx);

#ifndef __clang__
#ifdef __USE_OPENMP
#pragma omp parallel for
#endif
#endif
    for(int i = 0; i < grid_size_t; ++i) {
        E3_real[i].resize(nx);
        E3_complex[i].resize(nx);
        G3[i].resize(nx);
        for(int j = 0; j < grid_size_x; ++j) {
            E3_real[i][j].resize(ny);
            E3_complex[i][j].resize(ny);
            G3[i][j].resize(ny);
        }
    }

    CMakeInitialPulses3D<RealArray1D,RealArray2D,RealArray3D,true>::makeInitialPulses(E1_real, E2_real, E3_real);
    CMakeInitialPulses3D<ComplexArray1D,ComplexArray2D,ComplexArray3D,false>::makeInitialPulses(E1_complex, E2_complex, E3_complex);

    // Measure the execution time of Simple FFT
    // 1) 1D Simple FFT for real data
    clock_t beginTime = clock();
    for(int i = 0; i < numFFTLoops1D; ++i) {
        res = FFT(E1_real, G1, nt, err_str);
        if (!res) {
            std::cout << "Simple FFT 1D real failed: " << err_str << std::endl;
            return false;
        }
    }
    std::cout << "Simple 1D FFT for real data: execution time for "
              << numFFTLoops1D << " loops: " << std::setprecision(20)
              << real_type(clock() - beginTime)/CLOCKS_PER_SEC << std::endl;
    // 2) 1D Simple FFT for complex data
    beginTime = clock();
    for(int i = 0; i < numFFTLoops1D; ++i) {
        res = FFT(E1_complex, G1, nt, err_str);
        if (!res) {
            std::cout << "Simple FFT 1D complex failed: " << err_str << std::endl;
            return false;
        }
    }
    std::cout << "Simple 1D FFT for complex data: execution time for "
              << numFFTLoops1D << " loops: " << std::setprecision(20)
              << real_type(clock() - beginTime)/CLOCKS_PER_SEC << std::endl;
    // 3) 2D Simple FFT for real data
    beginTime = clock();
    for(int i = 0; i < numFFTLoops2D; ++i) {
        res = FFT(E2_real, G2, nt, nx, err_str);
        if (!res) {
            std::cout << "Simple FFT 2D real failed: " << err_str << std::endl;
            return false;
        }
    }
    std::cout << "Simple 2D FFT for real data: execution time for "
              << numFFTLoops2D << " loops: " << std::setprecision(20)
              << real_type(clock() - beginTime)/CLOCKS_PER_SEC << std::endl;
    // 4) 2D Simple FFT for complex data
    beginTime = clock();
    for(int i = 0; i < numFFTLoops2D; ++i) {
        res = FFT(E2_complex, G2, nt, nx, err_str);
        if (!res) {
            std::cout << "Simple FFT 2D complex failed: " << err_str << std::endl;
            return false;
        }
    }
    std::cout << "Simple 2D FFT for complex data: execution time for "
              << numFFTLoops2D << " loops: " << std::setprecision(20)
              << real_type(clock() - beginTime)/CLOCKS_PER_SEC << std::endl;
    // 5) 3D Simple FFT for real data
    beginTime = clock();
    for(int i = 0; i < numFFTLoops3D; ++i) {
        res = FFT(E3_real, G3, nt, nx, ny, err_str);
        if (!res) {
            std::cout << "Simple FFT 3D real failed: " << err_str << std::endl;
            return false;
        }
    }
    std::cout << "Simple 3D FFT for real data: execution time for "
              << numFFTLoops3D << " loops: " << std::setprecision(20)
              << real_type(clock() - beginTime)/CLOCKS_PER_SEC << std::endl;
    // 6) 3D Simple FFT for complex data
    beginTime = clock();
    for(int i = 0; i < numFFTLoops3D; ++i) {
        res = FFT(E3_complex, G3, nt, nx, ny, err_str);
        if (!res) {
            std::cout << "Simple FFT 3D complex failed: " << err_str << std::endl;
            return false;
        }
    }
    std::cout << "Simple 3D FFT for complex data: execution time for "
              << numFFTLoops3D << " loops: " << std::setprecision(20)
              << real_type(clock() - beginTime)/CLOCKS_PER_SEC << std::endl;


    // Measure the execution time for FFTW3
    // 1) FFTW 1D for real data
    fftw_plan fftwPlan = fftw_plan_dft_r2c_1d(nt, &E1_real[0],
                                              reinterpret_cast<fftw_complex*>(&G1[0]),
                                              FFTW_MEASURE);
    beginTime = clock();
    for(int i = 0; i < numFFTLoops1D; ++i) {
        fftw_execute(fftwPlan);
    }
    std::cout << "FFTW3 1D FFT for real data: execution time for "
              << numFFTLoops1D << " loops: " << std::setprecision(20)
              << real_type(clock() - beginTime)/CLOCKS_PER_SEC << std::endl;
    fftw_destroy_plan(fftwPlan);
    // 2) FFTW 1D for complex data
    fftwPlan = fftw_plan_dft_1d(nt, reinterpret_cast<fftw_complex*>(&E1_complex[0]),
                                reinterpret_cast<fftw_complex*>(&G1[0]),
                                FFTW_FORWARD, FFTW_MEASURE);
    beginTime = clock();
    for(int i = 0; i < numFFTLoops1D; ++i) {
        fftw_execute(fftwPlan);
    }
    std::cout << "FFTW3 1D FFT for complex data: execution time for "
              << numFFTLoops1D << " loops: " << std::setprecision(20)
              << real_type(clock() - beginTime)/CLOCKS_PER_SEC << std::endl;
    fftw_destroy_plan(fftwPlan);
    // 3) FFTW 2D for real data
    // NOTE: I can't pass my data to FFTW in its original form, it causes runtime errors,
    //       so I'm allocating another buffer array and copying my data twice -
    //       before and after the FFT. And yes, I'm including the time it takes
    //       into the measurement because I'm measuring the time to get the job done,
    //       not the time of some function being running.
    beginTime = clock();
    real_type*    twoDimRealArray = (real_type*)(fftw_malloc(nt*nx*sizeof(real_type)));
    fftw_complex* twoDimComplexArray = (fftw_complex*)(fftw_malloc(nt*nx*sizeof(fftw_complex)));
    for(size_t i = 0; i < nt; ++i) {
        for(size_t j = 0; j < nx; ++j) {
            *(twoDimRealArray + i * nx + j) = E2_real[i][j];
        }
    }
    fftwPlan = fftw_plan_dft_r2c_2d(nt, nx, twoDimRealArray, twoDimComplexArray,
                                    FFTW_MEASURE);
    for(int i = 0; i < numFFTLoops2D; ++i) {
        fftw_execute(fftwPlan);
    }
    for(size_t i = 0; i < nt; ++i) {
        for(size_t j = 0; j < nx; ++j) {
            G2[i][j] = complex_type((*(twoDimComplexArray + i*nx + j))[0],
                                    (*(twoDimComplexArray + i*nx + j))[1]);
        }
    }
    std::cout << "FFTW 2D FFT for real data: execution time for "
              << numFFTLoops2D << " loops: " << std::setprecision(20)
              << real_type(clock() - beginTime)/CLOCKS_PER_SEC << std::endl;
    fftw_destroy_plan(fftwPlan);
    // 4) FFTW 2D for complex data
    beginTime = clock();
    twoDimComplexArray = (fftw_complex*)(fftw_malloc(nt*nx*sizeof(fftw_complex)));
    fftw_complex* twoDimComplexArraySpectrum = (fftw_complex*)(fftw_malloc(nt*nx*sizeof(fftw_complex)));
    for(size_t i = 0; i < nt; ++i) {
        for(size_t j = 0; j < nx; ++j) {
            *(twoDimComplexArray + i * nx + j)[0] = std::real(E2_complex[i][j]);
            *(twoDimComplexArray + i * nx + j)[1] = std::imag(E2_complex[i][j]);
        }
    }
    fftwPlan = fftw_plan_dft_2d(nt, nx, twoDimComplexArray, twoDimComplexArraySpectrum,
                                FFTW_FORWARD, FFTW_MEASURE);
    for(int i = 0; i < numFFTLoops2D; ++i) {
        fftw_execute(fftwPlan);
    }
    for(size_t i = 0; i < nt; ++i) {
        for(size_t j = 0; j < nx; ++j) {
            G2[i][j] = complex_type((*(twoDimComplexArraySpectrum + i*nx + j))[0],
                                    (*(twoDimComplexArraySpectrum + i*nx + j))[1]);
        }
    }
    std::cout << "FFTW 2D FFT for complex data: execution time for "
              << numFFTLoops2D << " loops: " << std::setprecision(20)
              << real_type(clock() - beginTime)/CLOCKS_PER_SEC << std::endl;
    fftw_destroy_plan(fftwPlan);
    // 5) FFTW 3D for real data
    beginTime = clock();
    real_type*    threeDimRealArray = (real_type*)(fftw_malloc(nt*nx*ny*sizeof(real_type)));
    fftw_complex* threeDimComplexArray = (fftw_complex*)(fftw_malloc(nt*nx*ny*sizeof(fftw_complex)));
    for(size_t i = 0; i < nt; ++i) {
        for(size_t j = 0; j < nx; ++j) {
            for(size_t k = 0; k < ny; ++k) {
                *(threeDimRealArray + i * nx * ny + j * ny + k) = E3_real[i][j][k];
            }
        }
    }
    fftwPlan = fftw_plan_dft_r2c_3d(nt, nx, ny, threeDimRealArray, threeDimComplexArray,
                                    FFTW_MEASURE);
    for(int i = 0; i < numFFTLoops3D; ++i) {
        fftw_execute(fftwPlan);
    }
    for(size_t i = 0; i < nt; ++i) {
        for(size_t j = 0; j < nx; ++j) {
            for(size_t k = 0; k < ny; ++k) {
                E3_real[i][j][k] = *(threeDimRealArray + i * nx * ny + j * ny + k);
                G3[i][j][k] = complex_type((*(threeDimComplexArray + i * nx * ny + j * ny + k))[0],
                                           (*(threeDimComplexArray + i * nx * ny + j * ny + k))[1]);
            }
        }
    }
    std::cout << "FFTW 3D FFT for real data: execution time for "
              << numFFTLoops3D << " loops: " << std::setprecision(20)
              << real_type(clock() - beginTime)/CLOCKS_PER_SEC << std::endl;
    fftw_destroy_plan(fftwPlan);
    // 6) FFTW 3D for complex data
    beginTime = clock();
    threeDimComplexArray = (fftw_complex*)(fftw_malloc(nt*nx*ny*sizeof(fftw_complex)));
    fftw_complex* threeDimComplexArraySpectrum = (fftw_complex*)(fftw_malloc(nt*nx*ny*sizeof(fftw_complex)));
    for(size_t i = 0; i < nt; ++i) {
        for(size_t j = 0; j < nx; ++j) {
            for(size_t k = 0; k < ny; ++k) {
                *(threeDimComplexArray + i * nx * ny + j * ny + k)[0] = std::real(E3_complex[i][j][k]);
                *(threeDimComplexArray + i * nx * ny + j * ny + k)[1] = std::imag(E3_complex[i][j][k]);
            }
        }
    }
    fftwPlan = fftw_plan_dft_3d(nt, nx, ny, threeDimComplexArray, threeDimComplexArraySpectrum,
                                FFTW_FORWARD, FFTW_MEASURE);
    for(int i = 0; i < numFFTLoops3D; ++i) {
        fftw_execute(fftwPlan);
    }
    for(size_t i = 0; i < nt; ++i) {
        for(size_t j = 0; j < nx; ++j) {
            for(size_t k = 0; k < ny; ++k) {
                G3[i][j][k] = complex_type((*(threeDimComplexArraySpectrum + i * nx * ny + j * ny + k))[0],
                                           (*(threeDimComplexArraySpectrum + i * nx * ny + j * ny + k))[1]);
            }
        }
    }
    std::cout << "FFTW 3D FFT for complex data: execution time for "
              << numFFTLoops3D << " loops: " << std::setprecision(20)
              << real_type(clock() - beginTime)/CLOCKS_PER_SEC << std::endl;
    fftw_destroy_plan(fftwPlan);

    return true;

}

} // namespace fft_test
} // namespace simple_fft


================================================
FILE: benchmark-tests/benchmark_tests_fftw3.h
================================================
#ifndef __SIMPLE_FFT__BENCHMARK_TESTS_FFTW3_H
#define __SIMPLE_FFT__BENCHMARK_TESTS_FFTW3_H

namespace simple_fft {
namespace fft_test {

bool BenchmarkTestAgainstFFTW3();

}
}

#endif // __SIMPLE_FFT__BENCHMARK_TESTS_FFTW3_H


================================================
FILE: benchmark-tests/benchmark_tests_main.cpp
================================================
#include "benchmark_tests_fftw3.h"

int main()
{
    using namespace simple_fft;
    using namespace fft_test;

#ifdef HAS_FFTW3
    if(!BenchmarkTestAgainstFFTW3()) {
        return false;
    }
#endif

    return 0;
}


================================================
FILE: benchmark-tests/results_Linux_Mint_14_x86_64_Intel_Core_i5_10_Gb_RAM/benchmark_test_multiple_transforms_GCC_release.txt
================================================
FFT test: creating real 1D pulse.
Done.
FFT test: creating real 2D pulse.
Done.
FFT test: creating real 3D pulse.
Done.
FFT test: creating real 1D pulse.
Done.
FFT test: creating real 2D pulse.
Done.
FFT test: creating real 3D pulse.
Done.
Simple 1D FFT for real data: execution time for 10000 loops: 0.16999999999999998446
Simple 1D FFT for complex data: execution time for 10000 loops: 0.22000000000000000111
Simple 2D FFT for real data: execution time for 500 loops: 3.1199999999999996625
Simple 2D FFT for complex data: execution time for 500 loops: 2.8699999999999996625
Simple 3D FFT for real data: execution time for 15 loops: 11.509999999999999787
Simple 3D FFT for complex data: execution time for 15 loops: 10.779999999999999361
FFTW3 1D FFT for real data: execution time for 10000 loops: 0.010000000000000000208
FFTW3 1D FFT for complex data: execution time for 10000 loops: 0
FFTW 2D FFT for real data: execution time for 500 loops: 0.040000000000000000833
FFTW 2D FFT for complex data: execution time for 500 loops: 0.080000000000000001665
FFTW 3D FFT for real data: execution time for 15 loops: 0.48999999999999999112
FFTW 3D FFT for complex data: execution time for 15 loops: 0.96999999999999997335


================================================
FILE: benchmark-tests/results_Linux_Mint_14_x86_64_Intel_Core_i5_10_Gb_RAM/benchmark_test_multiple_transforms_ICC_release.txt
================================================
FFT test: creating real 1D pulse.
Done.
FFT test: creating real 2D pulse.
Done.
FFT test: creating real 3D pulse.
Done.
FFT test: creating real 1D pulse.
Done.
FFT test: creating real 2D pulse.
Done.
FFT test: creating real 3D pulse.
Done.
Simple 1D FFT for real data: execution time for 10000 loops: 0.20999999999999999223
Simple 1D FFT for complex data: execution time for 10000 loops: 0.14999999999999999445
Simple 2D FFT for real data: execution time for 500 loops: 2.6400000000000001243
Simple 2D FFT for complex data: execution time for 500 loops: 2.7999999999999998224
Simple 3D FFT for real data: execution time for 15 loops: 10.359999999999999432
Simple 3D FFT for complex data: execution time for 15 loops: 10.419999999999999929
FFTW3 1D FFT for real data: execution time for 10000 loops: 0
FFTW3 1D FFT for complex data: execution time for 10000 loops: 0.010000000000000000208
FFTW 2D FFT for real data: execution time for 500 loops: 0.26000000000000000888
FFTW 2D FFT for complex data: execution time for 500 loops: 0.46000000000000001998
FFTW 3D FFT for real data: execution time for 15 loops: 0.56999999999999995115
FFTW 3D FFT for complex data: execution time for 15 loops: 0.95999999999999996447


================================================
FILE: benchmark-tests/results_Linux_Mint_14_x86_64_Intel_Core_i5_10_Gb_RAM/benchmark_test_multiple_transforms_clang_release.txt
================================================
FFT test: creating real 1D pulse.
Done.
FFT test: creating real 2D pulse.
Done.
FFT test: creating real 3D pulse.
Done.
FFT test: creating real 1D pulse.
Done.
FFT test: creating real 2D pulse.
Done.
FFT test: creating real 3D pulse.
Done.
Simple 1D FFT for real data: execution time for 10000 loops: 0.020000000000000000416
Simple 1D FFT for complex data: execution time for 10000 loops: 0.02999999999999999889
Simple 2D FFT for real data: execution time for 500 loops: 0.46000000000000001998
Simple 2D FFT for complex data: execution time for 500 loops: 0.46999999999999997335
Simple 3D FFT for real data: execution time for 15 loops: 1.7299999999999999822
Simple 3D FFT for complex data: execution time for 15 loops: 1.7399999999999999911
FFTW3 1D FFT for real data: execution time for 10000 loops: 0.010000000000000000208
FFTW3 1D FFT for complex data: execution time for 10000 loops: 0
FFTW 2D FFT for real data: execution time for 500 loops: 0.040000000000000000833
FFTW 2D FFT for complex data: execution time for 500 loops: 0.080000000000000001665
FFTW 3D FFT for real data: execution time for 15 loops: 0.48999999999999999112
FFTW 3D FFT for complex data: execution time for 15 loops: 0.98999999999999999112


================================================
FILE: benchmark-tests/results_Linux_Mint_14_x86_64_Intel_Core_i5_10_Gb_RAM/benchmark_test_single_transform_GCC_release.txt
================================================
FFT test: creating real 1D pulse.
Done.
FFT test: creating real 2D pulse.
Done.
FFT test: creating real 3D pulse.
Done.
FFT test: creating real 1D pulse.
Done.
FFT test: creating real 2D pulse.
Done.
FFT test: creating real 3D pulse.
Done.
Simple 1D FFT for real data: execution time for 1 loops: 0
Simple 1D FFT for complex data: execution time for 1 loops: 0
Simple 2D FFT for real data: execution time for 1 loops: 0.010000000000000000208
Simple 2D FFT for complex data: execution time for 1 loops: 0.020000000000000000416
Simple 3D FFT for real data: execution time for 1 loops: 0.71999999999999997335
Simple 3D FFT for complex data: execution time for 1 loops: 0.65999999999999992006
FFTW3 1D FFT for real data: execution time for 1 loops: 0
FFTW3 1D FFT for complex data: execution time for 1 loops: 0
FFTW 2D FFT for real data: execution time for 1 loops: 0.010000000000000000208
FFTW 2D FFT for complex data: execution time for 1 loops: 0.02999999999999999889
FFTW 3D FFT for real data: execution time for 1 loops: 0.35999999999999998668
FFTW 3D FFT for complex data: execution time for 1 loops: 0.71999999999999997335


================================================
FILE: benchmark-tests/results_Linux_Mint_14_x86_64_Intel_Core_i5_10_Gb_RAM/benchmark_test_single_transform_ICC_release.txt
================================================
FFT test: creating real 1D pulse.
Done.
FFT test: creating real 2D pulse.
Done.
FFT test: creating real 3D pulse.
Done.
FFT test: creating real 1D pulse.
Done.
FFT test: creating real 2D pulse.
Done.
FFT test: creating real 3D pulse.
Done.
Simple 1D FFT for real data: execution time for 1 loops: 0
Simple 1D FFT for complex data: execution time for 1 loops: 0
Simple 2D FFT for real data: execution time for 1 loops: 0.010000000000000000208
Simple 2D FFT for complex data: execution time for 1 loops: 0
Simple 3D FFT for real data: execution time for 1 loops: 0.60999999999999998668
Simple 3D FFT for complex data: execution time for 1 loops: 0.64000000000000001332
FFTW3 1D FFT for real data: execution time for 1 loops: 0
FFTW3 1D FFT for complex data: execution time for 1 loops: 0
FFTW 2D FFT for real data: execution time for 1 loops: 0.050000000000000002776
FFTW 2D FFT for complex data: execution time for 1 loops: 0.11999999999999999556
FFTW 3D FFT for real data: execution time for 1 loops: 1.0900000000000000799
FFTW 3D FFT for complex data: execution time for 1 loops: 0.75


================================================
FILE: benchmark-tests/results_Linux_Mint_14_x86_64_Intel_Core_i5_10_Gb_RAM/benchmark_test_single_transform_clang_release.txt
================================================
FFT test: creating real 1D pulse.
Done.
FFT test: creating real 2D pulse.
Done.
FFT test: creating real 3D pulse.
Done.
FFT test: creating real 1D pulse.
Done.
FFT test: creating real 2D pulse.
Done.
FFT test: creating real 3D pulse.
Done.
Simple 1D FFT for real data: execution time for 1 loops: 0
Simple 1D FFT for complex data: execution time for 1 loops: 0
Simple 2D FFT for real data: execution time for 1 loops: 0.010000000000000000208
Simple 2D FFT for complex data: execution time for 1 loops: 0
Simple 3D FFT for real data: execution time for 1 loops: 0.11000000000000000056
Simple 3D FFT for complex data: execution time for 1 loops: 0.10000000000000000555
FFTW3 1D FFT for real data: execution time for 1 loops: 0
FFTW3 1D FFT for complex data: execution time for 1 loops: 0
FFTW 2D FFT for real data: execution time for 1 loops: 0.020000000000000000416
FFTW 2D FFT for complex data: execution time for 1 loops: 0.02999999999999999889
FFTW 3D FFT for real data: execution time for 1 loops: 0.3499999999999999778
FFTW 3D FFT for complex data: execution time for 1 loops: 0.73999999999999999112


================================================
FILE: include/simple_fft/check_fft.hpp
================================================
/**
 * Copyright (c) 2013-2020 Dmitry Ivanov
 *
 * This file is a part of Simple-FFT project and is distributed under the terms
 * of MIT license: https://opensource.org/licenses/MIT
 */

#ifndef __SIMPLE_FFT__CHECK_FFT_HPP__
#define __SIMPLE_FFT__CHECK_FFT_HPP__

#include "fft_settings.h"
#include "error_handling.hpp"
#include "copy_array.hpp"
#include <cstddef>
#include <cmath>
#include <numeric>

using std::size_t;

namespace simple_fft {
namespace check_fft_private {

enum CheckMode
{
    CHECK_FFT_PARSEVAL,
    CHECK_FFT_ENERGY,
    CHECK_FFT_EQUALITY
};

template <class TArray1D, class TComplexArray1D>
void getMaxAbsoluteAndRelativeErrorNorms(const TArray1D & array1,
                                         const TComplexArray1D & array2, const size_t size,
                                         real_type & max_absolute_error_norm,
                                         real_type & max_relative_error_norm)
{
    using std::abs;

    real_type current_error;

    // NOTE: no parallelization here, it is a completely sequential loop!
    for(size_t i = 0; i < size; ++i) {
#ifdef __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
        current_error = abs(array1[i] - array2[i]);
#else
        current_error = abs(array1(i) - array2(i));
#endif
        if (current_error > max_absolute_error_norm) {
            max_absolute_error_norm = current_error;
#ifdef __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
            if (abs(array1[i]) > abs(array2[i])) {
                max_relative_error_norm = (abs(array1[i]) > 1e-20
                                           ? max_absolute_error_norm / abs(array1[i])
                                           : 0.0);
            }
            else {
                max_relative_error_norm = (abs(array2[i]) > 1e-20
                                           ? max_absolute_error_norm / abs(array2[i])
                                           : 0.0);
            }
#else
            if (abs(array1(i)) > abs(array2(i))) {
                max_relative_error_norm = (abs(array1(i)) > 1e-20
                                           ? max_absolute_error_norm / abs(array1(i))
                                           : 0.0);
            }
            else {
                max_relative_error_norm = (abs(array2(i)) > 1e-20
                                           ? max_absolute_error_norm / abs(array2(i))
                                           : 0.0);
            }
#endif
        }
    }
}

template <class TArray2D, class TComplexArray2D>
void getMaxAbsoluteAndRelativeErrorNorms(const TArray2D & array1,
                                         const TComplexArray2D & array2,
                                         const size_t size1, const size_t size2,
                                         real_type & max_absolute_error_norm,
                                         real_type & max_relative_error_norm)
{
    using std::abs;

    real_type current_error;

    // NOTE: no parallelization here, it is a completely sequential loop!
    for(int i = 0; i < static_cast<int>(size1); ++i) {
        for(int j = 0; j < static_cast<int>(size2); ++j) {
#ifdef __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
            current_error = abs(array1[i][j] - array2[i][j]);
#else
            current_error = abs(array1(i,j) - array2(i,j));
#endif
            if (current_error > max_absolute_error_norm) {
                max_absolute_error_norm = current_error;
#ifdef __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
                if (abs(array1[i][j]) > abs(array2[i][j])) {
                    max_relative_error_norm = (abs(array1[i][j]) > 1e-20
                                               ? max_absolute_error_norm / abs(array1[i][j])
                                               : 0.0);
                }
                else {
                    max_relative_error_norm = (abs(array2[i][j]) > 1e-20
                                               ? max_absolute_error_norm / abs(array2[i][j])
                                               : 0.0);
                }
#else
                if (abs(array1(i,j)) > abs(array2(i,j))) {
                    max_relative_error_norm = (abs(array1(i,j)) > 1e-20
                                               ? max_absolute_error_norm / abs(array1(i,j))
                                               : 0.0);
                }
                else {
                    max_relative_error_norm = (abs(array2(i,j)) > 1e-20
                                               ? max_absolute_error_norm / abs(array2(i,j))
                                               : 0.0);
                }
#endif
            }
        }
    }
}

template <class TArray3D, class TComplexArray3D>
void getMaxAbsoluteAndRelativeErrorNorms(const TArray3D & array1, const TComplexArray3D & array2,
                                         const size_t size1, const size_t size2,
                                         const size_t size3, real_type & max_absolute_error_norm,
                                         real_type & max_relative_error_norm)
{
    using std::abs;

    real_type current_error;

    // NOTE: no parallelization here, it is a completely sequential loop!
    for(int i = 0; i < static_cast<int>(size1); ++i) {
        for(int j = 0; j < static_cast<int>(size2); ++j) {
            for(int k = 0; k < static_cast<int>(size3); ++k) {
#ifdef __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
                current_error = abs(array1[i][j][k] - array2[i][j][k]);
#else
                current_error = abs(array1(i,j,k) - array2(i,j,k));
#endif
                if (current_error > max_absolute_error_norm) {
                    max_absolute_error_norm = current_error;
#ifdef __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
                    if (abs(array1[i][j][k]) > abs(array2[i][j][k])) {
                        max_relative_error_norm = (abs(array1[i][j][k]) > 1e-20
                                                   ? max_absolute_error_norm / abs(array1[i][j][k])
                                                   : 0.0);
                    }
                    else {
                        max_relative_error_norm = (abs(array2[i][j][k]) > 1e-20
                                                   ? max_absolute_error_norm / abs(array2[i][j][k])
                                                   : 0.0);
                    }
#else
                    if (abs(array1(i,j,k)) > abs(array2(i,j,k))) {
                        max_relative_error_norm = (abs(array1(i,j,k)) > 1e-20
                                                   ? max_absolute_error_norm / abs(array1(i,j,k))
                                                   : 0.0);
                    }
                    else {
                        max_relative_error_norm = (abs(array2(i,j,k)) > 1e-20
                                                   ? max_absolute_error_norm / abs(array2(i,j,k))
                                                   : 0.0);
                    }
#endif
                }
            }
        }
    }
}

template <class TArray1D>
real_type squareAbsAccumulate(const TArray1D & array, const size_t size,
                              const real_type init)
{
    int size_signed = static_cast<int>(size);
    real_type sum = init;

    using std::abs;

#ifndef __clang__
#ifdef __USE_OPENMP
#pragma omp parallel for reduction(+:sum)
#endif
#endif
    for(int i = 0; i < size_signed; ++i) {
#ifdef __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
        sum += abs(array[i] * array[i]);
#else
        sum += abs(array(i) * array(i));
#endif
    }

    return sum;
}

template <class TArray2D>
real_type squareAbsAccumulate(const TArray2D & array, const size_t size1,
                              const size_t size2, const real_type init)
{
    int size1_signed = static_cast<int>(size1);
    int size2_signed = static_cast<int>(size2);
    real_type sum = init;

    using std::abs;

#ifndef __clang__
#ifdef __USE_OPENMP
#pragma omp parallel for reduction(+:sum)
#endif
#endif
    for(int i = 0; i < size1_signed; ++i) {
        for(int j = 0; j < size2_signed; ++j) {
#ifdef __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
            sum += abs(array[i][j] * array[i][j]);
#else
            sum += abs(array(i,j) * array(i,j));
#endif
        }
    }

    return sum;
}

template <class TArray3D>
real_type squareAbsAccumulate(const TArray3D & array, const size_t size1,
                              const size_t size2, const size_t size3,
                              const real_type init)
{
    int size1_signed = static_cast<int>(size1);
    int size2_signed = static_cast<int>(size2);
    int size3_signed = static_cast<int>(size3);
    real_type sum = init;

    using std::abs;

#ifndef __clang__
#ifdef __USE_OPENMP
#pragma omp parallel for reduction(+:sum)
#endif
#endif
    for(int i = 0; i < size1_signed; ++i) {
        for(int j = 0; j < size2_signed; ++j) {
            for(int k = 0; k < size3_signed; ++k) {
#ifdef __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
                sum += abs(array[i][j][k] * array[i][j][k]);
#else
                sum += abs(array(i,j,k) * array(i,j,k));
#endif
            }
        }
    }

    return sum;
}

// Generic template for CCheckFFT struct followed by its explicit specializations
// for certain numbers of dimensions. TArray can be either of real or complex type.
// The technique is similar to the one applied for CFFT struct.
template <class TArray, class TComplexArray, int NumDims>
struct CCheckFFT
{};

template <class TArray1D, class TComplexArray1D>
struct CCheckFFT<TArray1D,TComplexArray1D,1>
{
    static bool check_fft(const TArray1D & data_before,
                          const TComplexArray1D & data_after,
                          const size_t size, const real_type relative_tolerance,
                          real_type & discrepancy, const CheckMode check_mode,
                          const char *& error_description)
    {
        using namespace error_handling;

        if(0 == size) {
            GetErrorDescription(EC_NUM_OF_ELEMS_IS_ZERO, error_description);
            return false;
        }

        if ( (CHECK_FFT_PARSEVAL != check_mode) &&
             (CHECK_FFT_ENERGY   != check_mode) &&
             (CHECK_FFT_EQUALITY != check_mode) )
        {
            GetErrorDescription(EC_WRONG_CHECK_FFT_MODE, error_description);
            return false;
        }

        if (CHECK_FFT_EQUALITY != check_mode)
        {
            real_type sum_before = squareAbsAccumulate<TArray1D>(data_before, size, 0.0);
            real_type sum_after  = squareAbsAccumulate<TComplexArray1D>(data_after, size, 0.0);

            if (CHECK_FFT_PARSEVAL == check_mode) {
                sum_after /= size;
            }

            using std::abs;

            discrepancy = abs(sum_before - sum_after);

            if (discrepancy / ((sum_before < 1e-20) ? (sum_before + 1e-20) : sum_before) > relative_tolerance) {
                GetErrorDescription(EC_RELATIVE_ERROR_TOO_LARGE, error_description);
                return false;
            }
            else {
                return true;
            }
        }
        else {
            real_type relative_error;
            getMaxAbsoluteAndRelativeErrorNorms(data_before, data_after, size,
                                                discrepancy, relative_error);
            if (relative_error < relative_tolerance) {
                return true;
            }
            else {
                GetErrorDescription(EC_RELATIVE_ERROR_TOO_LARGE, error_description);
                return false;
            }
        }
    }
};

template <class TArray2D, class TComplexArray2D>
struct CCheckFFT<TArray2D,TComplexArray2D,2>
{
    static bool check_fft(const TArray2D & data_before,
                          const TComplexArray2D & data_after,
                          const size_t size1, const size_t size2,
                          const real_type relative_tolerance, real_type & discrepancy,
                          const CheckMode check_mode, const char *& error_description)
    {
        using namespace error_handling;

        if( (0 == size1) || (0 == size2) ) {
            GetErrorDescription(EC_NUM_OF_ELEMS_IS_ZERO, error_description);
            return false;
        }

        if ( (CHECK_FFT_PARSEVAL != check_mode) &&
             (CHECK_FFT_ENERGY   != check_mode) &&
             (CHECK_FFT_EQUALITY != check_mode) )
        {
            GetErrorDescription(EC_WRONG_CHECK_FFT_MODE, error_description);
            return false;
        }

        if (CHECK_FFT_EQUALITY != check_mode)
        {
            real_type sum_before = squareAbsAccumulate<TArray2D>(data_before, size1, size2, 0.0);
            real_type sum_after  = squareAbsAccumulate<TComplexArray2D>(data_after, size1, size2, 0.0);

            if (CHECK_FFT_PARSEVAL == check_mode) {
                sum_after /= size1 * size2;
            }

            using std::abs;

            discrepancy = abs(sum_before - sum_after);

            if (discrepancy / ((sum_before < 1e-20) ? (sum_before + 1e-20) : sum_before) > relative_tolerance) {
                GetErrorDescription(EC_RELATIVE_ERROR_TOO_LARGE, error_description);
                return false;
            }
            else {
                return true;
            }
        }
        else {
            real_type relative_error;
            getMaxAbsoluteAndRelativeErrorNorms(data_before, data_after, size1,
                                                size2, discrepancy, relative_error);
            if (relative_error < relative_tolerance) {
                return true;
            }
            else {
                GetErrorDescription(EC_RELATIVE_ERROR_TOO_LARGE, error_description);
                return false;
            }
        }
    }
};

template <class TArray3D, class TComplexArray3D>
struct CCheckFFT<TArray3D,TComplexArray3D,3>
{
    static bool check_fft(const TArray3D & data_before,
                          const TComplexArray3D & data_after,
                          const size_t size1, const size_t size2, const size_t size3,
                          const real_type relative_tolerance, real_type & discrepancy,
                          const CheckMode check_mode, const char *& error_description)
    {
        using namespace error_handling;

        if( (0 == size1) || (0 == size2) || (0 == size3) ) {
            GetErrorDescription(EC_NUM_OF_ELEMS_IS_ZERO, error_description);
            return false;
        }

        if ( (CHECK_FFT_PARSEVAL != check_mode) &&
             (CHECK_FFT_ENERGY   != check_mode) &&
             (CHECK_FFT_EQUALITY != check_mode) )
        {
            GetErrorDescription(EC_WRONG_CHECK_FFT_MODE, error_description);
            return false;
        }

        if (CHECK_FFT_EQUALITY != check_mode)
        {
            real_type sum_before = squareAbsAccumulate<TArray3D>(data_before, size1, size2, size3, 0.0);
            real_type sum_after  = squareAbsAccumulate<TComplexArray3D>(data_after, size1, size2, size3, 0.0);

            if (CHECK_FFT_PARSEVAL == check_mode) {
                sum_after /= size1 * size2 * size3;
            }

            using std::abs;

            discrepancy = abs(sum_before - sum_after);

            if (discrepancy / ((sum_before < 1e-20) ? (sum_before + 1e-20) : sum_before) > relative_tolerance) {
                GetErrorDescription(EC_RELATIVE_ERROR_TOO_LARGE, error_description);
                return false;
            }
            else {
                return true;
            }
        }
        else {
            real_type relative_error;
            getMaxAbsoluteAndRelativeErrorNorms(data_before, data_after, size1,
                                                size2, size3, discrepancy, relative_error);
            if (relative_error < relative_tolerance) {
                return true;
            }
            else {
                GetErrorDescription(EC_RELATIVE_ERROR_TOO_LARGE, error_description);
                return false;
            }
        }
    }
};

} // namespace check_fft_private

namespace check_fft {

template <class TArray1D, class TComplexArray1D>
bool checkParsevalTheorem(const TArray1D & data_before_FFT,
                          const TComplexArray1D & data_after_FFT,
                          const size_t size, const real_type relative_tolerance,
                          real_type & discrepancy, const char *& error_description)
{
    return check_fft_private::CCheckFFT<TArray1D,TComplexArray1D,1>::check_fft(data_before_FFT,
                                             data_after_FFT, size, relative_tolerance,
                                             discrepancy, check_fft_private::CHECK_FFT_PARSEVAL,
                                             error_description);
}

template <class TArray2D, class TComplexArray2D>
bool checkParsevalTheorem(const TArray2D & data_before_FFT,
                          const TComplexArray2D & data_after_FFT,
                          const size_t size1, const size_t size2,
                          const real_type relative_tolerance,
                          real_type & discrepancy, const char *& error_description)
{
    return check_fft_private::CCheckFFT<TArray2D,TComplexArray2D,2>::check_fft(data_before_FFT,
                                             data_after_FFT, size1, size2, relative_tolerance,
                                             discrepancy, check_fft_private::CHECK_FFT_PARSEVAL,
                                             error_description);
}

template <class TArray3D, class TComplexArray3D>
bool checkParsevalTheorem(const TArray3D & data_before_FFT,
                          const TComplexArray3D & data_after_FFT,
                          const size_t size1, const size_t size2, const size_t size3,
                          const real_type relative_tolerance, real_type & discrepancy,
                          const char *& error_description)
{
    return check_fft_private::CCheckFFT<TArray3D,TComplexArray3D,3>::check_fft(data_before_FFT,
                                                  data_after_FFT, size1, size2, size3,
                                                  relative_tolerance, discrepancy,
                                                  check_fft_private::CHECK_FFT_PARSEVAL,
                                                  error_description);
}

template <class TArray1D, class TComplexArray1D>
bool checkEnergyConservation(const TArray1D & data_before_FFT,
                             const TComplexArray1D & data_after_FFT_and_IFFT,
                             const size_t size, const real_type relative_tolerance,
                             real_type & discrepancy, const char *& error_description)
{
    return check_fft_private::CCheckFFT<TArray1D,TComplexArray1D,1>::check_fft(data_before_FFT,
                                    data_after_FFT_and_IFFT, size, relative_tolerance,
                                    discrepancy, check_fft_private::CHECK_FFT_ENERGY,
                                    error_description);
}

template <class TArray2D, class TComplexArray2D>
bool checkEnergyConservation(const TArray2D & data_before_FFT,
                             const TComplexArray2D & data_after_FFT_and_IFFT,
                             const size_t size1, const size_t size2,
                             const real_type relative_tolerance,
                             real_type & discrepancy, const char *& error_description)
{
    return check_fft_private::CCheckFFT<TArray2D,TComplexArray2D,2>::check_fft(data_before_FFT,
                                                data_after_FFT_and_IFFT, size1, size2,
                                                relative_tolerance, discrepancy,
                                                check_fft_private::CHECK_FFT_ENERGY,
                                                error_description);
}

template <class TArray3D, class TComplexArray3D>
bool checkEnergyConservation(const TArray3D & data_before_FFT,
                             const TComplexArray3D & data_after_FFT_and_IFFT,
                             const size_t size1, const size_t size2, const size_t size3,
                             const real_type relative_tolerance, real_type & discrepancy,
                             const char *& error_description)
{
    return check_fft_private::CCheckFFT<TArray3D,TComplexArray3D,3>::check_fft(data_before_FFT,
                                                data_after_FFT_and_IFFT, size1, size2,
                                                size3, relative_tolerance, discrepancy,
                                                check_fft_private::CHECK_FFT_ENERGY,
                                                error_description);
}

template <class TArray1D, class TComplexArray1D>
bool checkEquality(const TArray1D & data_before_FFT,
                   const TComplexArray1D & data_after_FFT_and_IFFT,
                   const size_t size, const real_type relative_tolerance,
                   real_type & discrepancy, const char *& error_description)
{
    return check_fft_private::CCheckFFT<TArray1D,TComplexArray1D,1>::check_fft(data_before_FFT,
                                             data_after_FFT_and_IFFT, size, relative_tolerance,
                                             discrepancy, check_fft_private::CHECK_FFT_EQUALITY,
                                             error_description);
}

template <class TArray2D, class TComplexArray2D>
bool checkEquality(const TArray2D & data_before_FFT,
                   const TComplexArray2D & data_after_FFT_and_IFFT, const size_t size1,
                   const size_t size2, const real_type relative_tolerance,
                   real_type & discrepancy, const char *& error_description)
{
    return check_fft_private::CCheckFFT<TArray2D,TComplexArray2D,2>::check_fft(data_before_FFT,
                                                         data_after_FFT_and_IFFT, size1, size2,
                                                         relative_tolerance, discrepancy,
                                                         check_fft_private::CHECK_FFT_EQUALITY,
                                                         error_description);
}

template <class TArray3D, class TComplexArray3D>
bool checkEquality(const TArray3D & data_before_FFT,
                   const TComplexArray3D & data_after_FFT_and_IFFT, const size_t size1,
                   const size_t size2, const size_t size3, const real_type relative_tolerance,
                   real_type & discrepancy, const char *& error_description)
{
    return check_fft_private::CCheckFFT<TArray3D,TComplexArray3D,3>::check_fft(data_before_FFT,
                                                         data_after_FFT_and_IFFT, size1, size2,
                                                         size3, relative_tolerance, discrepancy,
                                                         check_fft_private::CHECK_FFT_EQUALITY,
                                                         error_description);
}

} // namespace check_fft
} // namespace simple_fft

#endif // __SIMPLE_FFT__CHECK_FFT_HPP__


================================================
FILE: include/simple_fft/copy_array.hpp
================================================
/**
 * Copyright (c) 2013-2020 Dmitry Ivanov
 *
 * This file is a part of Simple-FFT project and is distributed under the terms
 * of MIT license: https://opensource.org/licenses/MIT
 */

#ifndef __SIMPLE_FFT__COPY_ARRAY_HPP
#define __SIMPLE_FFT__COPY_ARRAY_HPP

#include "fft_settings.h"
#include "error_handling.hpp"
#include <cstddef>

using std::size_t;

namespace simple_fft {
namespace copy_array {

template <class TComplexArray1D>
void copyArray(const TComplexArray1D & data_from, TComplexArray1D & data_to,
               const size_t size)
{
    int size_signed = static_cast<int>(size);

#ifndef __clang__
#ifdef __USE_OPENMP
#pragma omp parallel for
#endif
#endif
    for(int i = 0; i < size_signed; ++i) {
#ifdef __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
        data_to[i] = data_from[i];
#else
        data_to(i) = data_from(i);
#endif
    }
}

template <class TComplexArray1D, class TRealArray1D>
void copyArray(const TRealArray1D & data_from, TComplexArray1D & data_to,
               const size_t size)
{
    int size_signed = static_cast<int>(size);

    // NOTE: user's complex type should have constructor like
    // "complex(real, imag)", where each of real and imag has
    // real type.

#ifndef __clang__
#ifdef __USE_OPENMP
#pragma omp parallel for
#endif
#endif
    for(int i = 0; i < size_signed; ++i) {
#ifdef __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
        data_to[i] = complex_type(data_from[i], 0.0);
#else
        data_to(i) = complex_type(data_from(i), 0.0);
#endif
    }
}

template <class TComplexArray2D>
void copyArray(const TComplexArray2D & data_from, TComplexArray2D & data_to,
               const size_t size1, const size_t size2)
{
    int size1_signed = static_cast<int>(size1);
    int size2_signed = static_cast<int>(size2);

#ifndef __clang__
#ifdef __USE_OPENMP
#pragma omp parallel for
#endif
#endif
    for(int i = 0; i < size1_signed; ++i) {
        for(int j = 0; j < size2_signed; ++j) {
#ifdef __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
            data_to[i][j] = data_from[i][j];
#else
            data_to(i,j) = data_from(i,j);
#endif
        }
    }
}

template <class TComplexArray2D, class TRealArray2D>
void copyArray(const TRealArray2D & data_from, TComplexArray2D & data_to,
               const size_t size1, const size_t size2)
{
    int size1_signed = static_cast<int>(size1);
    int size2_signed = static_cast<int>(size2);

    // NOTE: user's complex type should have constructor like
    // "complex(real, imag)", where each of real and imag has
    // real type.

#ifndef __clang__
#ifdef __USE_OPENMP
#pragma omp parallel for
#endif
#endif
    for(int i = 0; i < size1_signed; ++i) {
        for(int j = 0; j < size2_signed; ++j) {
#ifdef __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
            data_to[i][j] = complex_type(data_from[i][j], 0.0);
#else
            data_to(i,j) = complex_type(data_from(i,j), 0.0);
#endif
        }
    }
}

template <class TComplexArray3D>
void copyArray(const TComplexArray3D & data_from, TComplexArray3D & data_to,
               const size_t size1, const size_t size2, const size_t size3)
{
    int size1_signed = static_cast<int>(size1);
    int size2_signed = static_cast<int>(size2);
    int size3_signed = static_cast<int>(size3);

#ifndef __clang__
#ifdef __USE_OPENMP
#pragma omp parallel for
#endif
#endif
    for(int i = 0; i < size1_signed; ++i) {
        for(int j = 0; j < size2_signed; ++j) {
            for(int k = 0; k < size3_signed; ++k) {
#ifdef __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
                data_to[i][j][k] = data_from[i][j][k];
#else
                data_to(i,j,k) = data_from(i,j,k);
#endif
            }
        }
    }
}

template <class TComplexArray3D, class TRealArray3D>
void copyArray(const TRealArray3D & data_from, TComplexArray3D & data_to,
               const size_t size1, const size_t size2, const size_t size3)
{
    int size1_signed = static_cast<int>(size1);
    int size2_signed = static_cast<int>(size2);
    int size3_signed = static_cast<int>(size3);

    // NOTE: user's complex type should have constructor like
    // "complex(real, imag)", where each of real and imag has
    // real type.

#ifndef __clang__
#ifdef __USE_OPENMP
#pragma omp parallel for
#endif
#endif
    for(int i = 0; i < size1_signed; ++i) {
        for(int j = 0; j < size2_signed; ++j) {
            for(int k = 0; k < size3_signed; ++k) {
#ifdef __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
                data_to[i][j][k] = complex_type(data_from[i][j][k], 0.0);
#else
                data_to(i,j,k) = complex_type(data_from(i,j,k), 0.0);
#endif
            }
        }
    }
}

} // namespace copy_array
} // namespace simple_fft

#endif // __SIMPLE_FFT__COPY_ARRAY_HPP


================================================
FILE: include/simple_fft/error_handling.hpp
================================================
/**
 * Copyright (c) 2013-2020 Dmitry Ivanov
 *
 * This file is a part of Simple-FFT project and is distributed under the terms
 * of MIT license: https://opensource.org/licenses/MIT
 */

#ifndef __SIMPLE_FFT__ERROR_HANDLING_HPP
#define __SIMPLE_FFT__ERROR_HANDLING_HPP

namespace simple_fft {
namespace error_handling {

enum EC_SimpleFFT
{
    EC_SUCCESS = 0,
    EC_UNSUPPORTED_DIMENSIONALITY,
    EC_WRONG_FFT_DIRECTION,
    EC_ONE_OF_DIMS_ISNT_POWER_OF_TWO,
    EC_NUM_OF_ELEMS_IS_ZERO,
    EC_WRONG_CHECK_FFT_MODE,
    EC_RELATIVE_ERROR_TOO_LARGE
};

inline void GetErrorDescription(const EC_SimpleFFT error_code,
                                const char *& error_description)
{
    switch(error_code)
    {
    case EC_SUCCESS:
        error_description = "Calculation was successful!";
        break;
    case EC_UNSUPPORTED_DIMENSIONALITY:
        error_description = "Unsupported dimensionality: currently only 1D, 2D "
                            "and 3D arrays are supported";
        break;
    case EC_WRONG_FFT_DIRECTION:
        error_description = "Wrong direction for FFT was specified";
        break;
    case EC_ONE_OF_DIMS_ISNT_POWER_OF_TWO:
        error_description = "Unsupported dimensionality: one of dimensions is not "
                            "a power of 2";
        break;
    case EC_NUM_OF_ELEMS_IS_ZERO:
        error_description = "Number of elements for FFT or IFFT is zero!";
        break;
    case EC_WRONG_CHECK_FFT_MODE:
        error_description = "Wrong check FFT mode was specified (should be either "
                            "Parseval theorem or energy conservation check";
        break;
    case EC_RELATIVE_ERROR_TOO_LARGE:
        error_description = "Relative error returned by FFT test exceeds specified "
                            "relative tolerance";
        break;
    default:
        error_description = "Unknown error";
        break;
    }
}

} // namespace error_handling
} // namespace simple_fft

#endif // __SIMPLE_FFT__ERROR_HANDLING_HPP


================================================
FILE: include/simple_fft/fft.h
================================================
/**
 * Copyright (c) 2013-2020 Dmitry Ivanov
 *
 * This file is a part of Simple-FFT project and is distributed under the terms
 * of MIT license: https://opensource.org/licenses/MIT
 */

#ifndef __SIMPLE_FFT__FFT_H__
#define __SIMPLE_FFT__FFT_H__

#include <cstddef>

using std::size_t;

/// The public API
namespace simple_fft {

/// FFT and IFFT functions

// in-place, complex, forward
template <class TComplexArray1D>
bool FFT(TComplexArray1D & data, const size_t size, const char *& error_description);

template <class TComplexArray2D>
bool FFT(TComplexArray2D & data, const size_t size1, const size_t size2,
         const char *& error_description);

template <class TComplexArray3D>
bool FFT(TComplexArray3D & data, const size_t size1, const size_t size2, const size_t size3,
         const char *& error_description);

// in-place, complex, inverse
template <class TComplexArray1D>
bool IFFT(TComplexArray1D & data, const size_t size, const char *& error_description);

template <class TComplexArray2D>
bool IFFT(TComplexArray2D & data, const size_t size1, const size_t size2,
          const char *& error_description);

template <class TComplexArray3D>
bool IFFT(TComplexArray3D & data, const size_t size1, const size_t size2, const size_t size3,
          const char *& error_description);

// not-in-place, complex, forward
template <class TComplexArray1D>
bool FFT(const TComplexArray1D & data_in, TComplexArray1D & data_out,
         const size_t size, const char *& error_description);

template <class TComplexArray2D>
bool FFT(const TComplexArray2D & data_in, TComplexArray2D & data_out,
         const size_t size1, const size_t size2, const char *& error_description);

template <class TComplexArray3D>
bool FFT(const TComplexArray3D & data_in, TComplexArray3D & data_out,
         const size_t size1, const size_t size2, const size_t size3,
         const char *& error_description);

// not-in-place, complex, inverse
template <class TComplexArray1D>
bool IFFT(const TComplexArray1D & data_in, TComplexArray1D & data_out,
          const size_t size, const char *& error_description);

template <class TComplexArray2D>
bool IFFT(const TComplexArray2D & data_in, TComplexArray2D & data_out,
          const size_t size1, const size_t size2, const char *& error_description);

template <class TComplexArray3D>
bool IFFT(const TComplexArray3D & data_in, TComplexArray3D & data_out,
          const size_t size1, const size_t size2, const size_t size3,
          const char *& error_description);

// not-in-place, real, forward
template <class TRealArray1D, class TComplexArray1D>
bool FFT(const TRealArray1D & data_in, TComplexArray1D & data_out,
         const size_t size, const char *& error_description);

template <class TRealArray2D, class TComplexArray2D>
bool FFT(const TRealArray2D & data_in, TComplexArray2D & data_out,
         const size_t size1, const size_t size2, const char *& error_description);

template <class TRealArray3D, class TComplexArray3D>
bool FFT(const TRealArray3D & data_in, TComplexArray3D & data_out,
         const size_t size1, const size_t size2, const size_t size3,
         const char *& error_description);

// NOTE: There is no inverse transform from complex spectrum to real signal
// because round-off errors during computation of inverse FFT lead to the appearance
// of signal imaginary components even though they are small by absolute value.
// These can be ignored but the author of this file thinks adding such an function
// would be wrong methodogically: looking at complex result, you can estimate
// the value of spurious imaginary part. Otherwise you may never know that IFFT
// provides too large imaginary values due to too small grid size, for example.

} // namespace simple_fft

#endif // __SIMPLE_FFT__FFT_H__

#include "fft.hpp"


================================================
FILE: include/simple_fft/fft.hpp
================================================
/**
 * Copyright (c) 2013-2020 Dmitry Ivanov
 *
 * This file is a part of Simple-FFT project and is distributed under the terms
 * of MIT license: https://opensource.org/licenses/MIT
 */

#ifndef __SIMPLE_FFT__FFT_HPP__
#define __SIMPLE_FFT__FFT_HPP__

#include "copy_array.hpp"
#include "fft_impl.hpp"

namespace simple_fft {

// in-place, complex, forward
template <class TComplexArray1D>
bool FFT(TComplexArray1D & data, const size_t size, const char *& error_description)
{
    return impl::CFFT<TComplexArray1D,1>::FFT_inplace(data, size, impl::FFT_FORWARD,
                                                      error_description);
}

template <class TComplexArray2D>
bool FFT(TComplexArray2D & data, const size_t size1, const size_t size2,
         const char *& error_description)
{
    return impl::CFFT<TComplexArray2D,2>::FFT_inplace(data, size1, size2, impl::FFT_FORWARD,
                                                      error_description);
}

template <class TComplexArray3D>
bool FFT(TComplexArray3D & data, const size_t size1, const size_t size2, const size_t size3,
         const char *& error_description)
{
    return impl::CFFT<TComplexArray3D,3>::FFT_inplace(data, size1, size2, size3,
                                                      impl::FFT_FORWARD,
                                                      error_description);
}

// in-place, complex, inverse
template <class TComplexArray1D>
bool IFFT(TComplexArray1D & data, const size_t size, const char *& error_description)
{
    return impl::CFFT<TComplexArray1D,1>::FFT_inplace(data, size, impl::FFT_BACKWARD,
                                                      error_description);
}

template <class TComplexArray2D>
bool IFFT(TComplexArray2D & data, const size_t size1, const size_t size2,
          const char *& error_description)
{
    return impl::CFFT<TComplexArray2D,2>::FFT_inplace(data, size1, size2, impl::FFT_BACKWARD,
                                                      error_description);
}

template <class TComplexArray3D>
bool IFFT(TComplexArray3D & data, const size_t size1, const size_t size2, const size_t size3,
          const char *& error_description)
{
    return impl::CFFT<TComplexArray3D,3>::FFT_inplace(data, size1, size2, size3,
                                                      impl::FFT_BACKWARD,
                                                      error_description);
}

// not-in-place, complex, forward
template <class TComplexArray1D>
bool FFT(const TComplexArray1D & data_in, TComplexArray1D & data_out,
         const size_t size, const char *& error_description)
{
    copy_array::copyArray(data_in, data_out, size);
    return impl::CFFT<TComplexArray1D,1>::FFT_inplace(data_out, size, impl::FFT_FORWARD,
                                                      error_description);
}

template <class TComplexArray2D>
bool FFT(const TComplexArray2D & data_in, TComplexArray2D & data_out,
         const size_t size1, const size_t size2, const char *& error_description)
{
    copy_array::copyArray(data_in, data_out, size1, size2);
    return impl::CFFT<TComplexArray2D,2>::FFT_inplace(data_out, size1, size2,
                                                      impl::FFT_FORWARD,
                                                      error_description);
}

template <class TComplexArray3D>
bool FFT(const TComplexArray3D & data_in, TComplexArray3D & data_out,
         const size_t size1, const size_t size2, const size_t size3,
         const char *& error_description)
{
    copy_array::copyArray(data_in, data_out, size1, size2, size3);
    return impl::CFFT<TComplexArray3D,3>::FFT_inplace(data_out, size1, size2, size3,
                                                      impl::FFT_FORWARD,
                                                      error_description);
}

// not-in-place, complex, inverse
template <class TComplexArray1D>
bool IFFT(const TComplexArray1D & data_in, TComplexArray1D & data_out,
          const size_t size, const char *& error_description)
{
    copy_array::copyArray(data_in, data_out, size);
    return impl::CFFT<TComplexArray1D,1>::FFT_inplace(data_out, size, impl::FFT_BACKWARD,
                                                      error_description);
}

template <class TComplexArray2D>
bool IFFT(const TComplexArray2D & data_in, TComplexArray2D & data_out,
          const size_t size1, const size_t size2, const char *& error_description)
{
    copy_array::copyArray(data_in, data_out, size1, size2);
    return impl::CFFT<TComplexArray2D,2>::FFT_inplace(data_out, size1, size2,
                                                      impl::FFT_BACKWARD,
                                                      error_description);
}

template <class TComplexArray3D>
bool IFFT(const TComplexArray3D & data_in, TComplexArray3D & data_out,
          const size_t size1, const size_t size2, const size_t size3,
          const char *& error_description)
{
    copy_array::copyArray(data_in, data_out, size1, size2, size3);
    return impl::CFFT<TComplexArray3D,3>::FFT_inplace(data_out, size1, size2, size3,
                                                      impl::FFT_BACKWARD,
                                                      error_description);
}

// not-in-place, real, forward
template <class TRealArray1D, class TComplexArray1D>
bool FFT(const TRealArray1D & data_in, TComplexArray1D & data_out,
         const size_t size, const char *& error_description)
{
    copy_array::copyArray(data_in, data_out, size);
    return impl::CFFT<TComplexArray1D,1>::FFT_inplace(data_out, size,
                                                      impl::FFT_FORWARD,
                                                      error_description);
}

template <class TRealArray2D, class TComplexArray2D>
bool FFT(const TRealArray2D & data_in, TComplexArray2D & data_out,
         const size_t size1, const size_t size2, const char *& error_description)
{
    copy_array::copyArray(data_in, data_out, size1, size2);
    return impl::CFFT<TComplexArray2D,2>::FFT_inplace(data_out, size1, size2,
                                                      impl::FFT_FORWARD,
                                                      error_description);
}

template <class TRealArray3D, class TComplexArray3D>
bool FFT(const TRealArray3D & data_in, TComplexArray3D & data_out,
         const size_t size1, const size_t size2, const size_t size3,
         const char *& error_description)
{
    copy_array::copyArray(data_in, data_out, size1, size2, size3);
    return impl::CFFT<TComplexArray3D,3>::FFT_inplace(data_out, size1, size2, size3,
                                                      impl::FFT_FORWARD,
                                                      error_description);
}

} // simple_fft

#endif // __SIMPLE_FFT__FFT_HPP__


================================================
FILE: include/simple_fft/fft_impl.hpp
================================================
/**
 * Copyright (c) 2013-2020 Dmitry Ivanov
 *
 * This file is a part of Simple-FFT project and is distributed under the terms
 * of MIT license: https://opensource.org/licenses/MIT
 */

#ifndef __SIMPLE_FFT__FFT_IMPL_HPP__
#define __SIMPLE_FFT__FFT_IMPL_HPP__

#include "fft_settings.h"
#include "error_handling.hpp"
#include <cstddef>
#include <math.h>
#include <vector>

using std::size_t;

#ifndef M_PI
#define M_PI 3.1415926535897932
#endif

namespace simple_fft {
namespace impl {

enum FFT_direction
{
    FFT_FORWARD = 0,
    FFT_BACKWARD
};

// checking whether the size of array dimension is power of 2
// via "complement and compare" method
inline bool isPowerOfTwo(const size_t num)
{
    return num && (!(num & (num - 1)));
}

inline bool checkNumElements(const size_t num_elements, const char *& error_description)
{
    using namespace error_handling;

    if (!isPowerOfTwo(num_elements)) {
        GetErrorDescription(EC_ONE_OF_DIMS_ISNT_POWER_OF_TWO, error_description);
        return false;
    }

    return true;
}

template <class TComplexArray1D>
inline void scaleValues(TComplexArray1D & data, const size_t num_elements)
{
    real_type mult = 1.0 / num_elements;
    int num_elements_signed = static_cast<int>(num_elements);

#ifndef __clang__
#ifdef __USE_OPENMP
#pragma omp parallel for
#endif
#endif
    for(int i = 0; i < num_elements_signed; ++i) {
#ifdef __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
        data[i] *= mult;
#else
        data(i) *= mult;
#endif
    }
}

// NOTE: explicit template specialization for the case of std::vector<complex_type>
// because it is used in 2D and 3D FFT for both array classes with square and round
// brackets of element access operator; I need to guarantee that sub-FFT 1D will
// use square brackets for element access operator anyway. It is pretty ugly
// to duplicate the code but I haven't found more elegant solution.
template <>
inline void scaleValues<std::vector<complex_type> >(std::vector<complex_type> & data,
                                                    const size_t num_elements)
{
    real_type mult = 1.0 / num_elements;
    int num_elements_signed = static_cast<int>(num_elements);

#ifndef __clang__
#ifdef __USE_OPENMP
#pragma omp parallel for
#endif
#endif
    for(int i = 0; i < num_elements_signed; ++i) {
        data[i] *= mult;
    }
}

template <class TComplexArray1D>
inline void bufferExchangeHelper(TComplexArray1D & data, const size_t index_from,
                                 const size_t index_to, complex_type & buf)
{
#ifdef __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
    buf = data[index_from];
    data[index_from] = data[index_to];
    data[index_to]= buf;
#else
    buf = data(index_from);
    data(index_from) = data(index_to);
    data(index_to)= buf;
#endif
}

// NOTE: explicit template specialization for the case of std::vector<complex_type>
// because it is used in 2D and 3D FFT for both array classes with square and round
// brackets of element access operator; I need to guarantee that sub-FFT 1D will
// use square brackets for element access operator anyway. It is pretty ugly
// to duplicate the code but I haven't found more elegant solution.
template <>
inline void bufferExchangeHelper<std::vector<complex_type> >(std::vector<complex_type> & data,
                                                             const size_t index_from,
                                                             const size_t index_to,
                                                             complex_type & buf)
{
    buf = data[index_from];
    data[index_from] = data[index_to];
    data[index_to]= buf;
}

template <class TComplexArray1D>
void rearrangeData(TComplexArray1D & data, const size_t num_elements)
{
    complex_type buf;

    size_t target_index = 0;
    size_t bit_mask;

    for (size_t i = 0; i < num_elements; ++i)
    {
        if (target_index > i)
        {
            bufferExchangeHelper(data, target_index, i, buf);
        }

        // Initialize the bit mask
        bit_mask = num_elements;

        // While bit is 1
        while (target_index & (bit_mask >>= 1)) // bit_mask = bit_mask >> 1
        {
            // Drop bit:
            // & is bitwise AND,
            // ~ is bitwise NOT
            target_index &= ~bit_mask; // target_index = target_index & (~bit_mask)
        }

        // | is bitwise OR
        target_index |= bit_mask; // target_index = target_index | bit_mask
    }
}

template <class TComplexArray1D>
inline void fftTransformHelper(TComplexArray1D & data, const size_t match,
                               const size_t k, complex_type & product,
                               const complex_type factor)
{
#ifdef __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
    product = data[match] * factor;
    data[match] = data[k] - product;
    data[k] += product;
#else
    product = data(match) * factor;
    data(match) = data(k) - product;
    data(k) += product;
#endif
}

// NOTE: explicit template specialization for the case of std::vector<complex_type>
// because it is used in 2D and 3D FFT for both array classes with square and round
// brackets of element access operator; I need to guarantee that sub-FFT 1D will
// use square brackets for element access operator anyway. It is pretty ugly
// to duplicate the code but I haven't found more elegant solution.
template <>
inline void fftTransformHelper<std::vector<complex_type> >(std::vector<complex_type> & data,
                                                           const size_t match,
                                                           const size_t k,
                                                           complex_type & product,
                                                           const complex_type factor)
{
    product = data[match] * factor;
    data[match] = data[k] - product;
    data[k] += product;
}

template <class TComplexArray1D>
bool makeTransform(TComplexArray1D & data, const size_t num_elements,
                   const FFT_direction fft_direction, const char *& error_description)
{
    using namespace error_handling;
    using std::sin;

    double local_pi;
    switch(fft_direction)
    {
    case(FFT_FORWARD):
        local_pi = -M_PI;
        break;
    case(FFT_BACKWARD):
        local_pi = M_PI;
        break;
    default:
        GetErrorDescription(EC_WRONG_FFT_DIRECTION, error_description);
        return false;
    }

    // declare variables to cycle the bits of initial signal
    size_t next, match;
    real_type sine;
    real_type delta;
    complex_type mult, factor, product;

    // NOTE: user's complex type should have constructor like
    // "complex(real, imag)", where each of real and imag has
    // real type.

    // cycle for all bit positions of initial signal
    for (size_t i = 1; i < num_elements; i <<= 1)
    {
        next = i << 1;  // getting the next bit
        delta = local_pi / i;    // angle increasing
        sine = sin(0.5 * delta);    // supplementary sin
        // multiplier for trigonometric recurrence
        mult = complex_type(-2.0 * sine * sine, sin(delta));
        factor = 1.0;   // start transform factor

        for (size_t j = 0; j < i; ++j) // iterations through groups
                                       // with different transform factors
        {
            for (size_t k = j; k < num_elements; k += next) // iterations through
                                                            // pairs within group
            {
                match = k + i;
                fftTransformHelper(data, match, k, product, factor);
            }
            factor = mult * factor + factor;
        }
    }

    return true;
}

// Generic template for complex FFT followed by its explicit specializations
template <class TComplexArray, int NumDims>
struct CFFT
{};

// 1D FFT:
template <class TComplexArray1D>
struct CFFT<TComplexArray1D,1>
{
    // NOTE: passing by pointer is needed to avoid using element access operator
    static bool FFT_inplace(TComplexArray1D & data, const size_t size,
                            const FFT_direction fft_direction,
                            const char *& error_description)
    {
        if(!checkNumElements(size, error_description)) {
            return false;
        }

        rearrangeData(data, size);

        if(!makeTransform(data, size, fft_direction, error_description)) {
            return false;
        }

        if (FFT_BACKWARD == fft_direction) {
            scaleValues(data, size);
        }

        return true;
    }
};

// 2D FFT
template <class TComplexArray2D>
struct CFFT<TComplexArray2D,2>
{
    static bool FFT_inplace(TComplexArray2D & data, const size_t size1, const size_t size2,
                            const FFT_direction fft_direction, const char *& error_description)
    {
        int n_rows = static_cast<int>(size1);
        int n_cols = static_cast<int>(size2);

        // fft for columns
        std::vector<complex_type> subarray(n_rows); // each column has n_rows elements

        for(int j = 0; j < n_cols; ++j)
        {
#ifndef __clang__
#ifdef __USE_OPENMP
#pragma omp parallel for
#endif
#endif
            for(int i = 0; i < n_rows; ++i) {
#ifdef __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
                subarray[i] = data[i][j];
#else
                subarray[i] = data(i,j);
#endif
            }

            if(!CFFT<std::vector<complex_type>,1>::FFT_inplace(subarray, size1,
                                                               fft_direction,
                                                               error_description))
            {
                return false;
            }

#ifndef __clang__
#ifdef __USE_OPENMP
#pragma omp parallel for
#endif
#endif
            for(int i = 0; i < n_rows; ++i) {
#ifdef __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
                data[i][j] = subarray[i];
#else
                data(i,j) = subarray[i];
#endif
            }
        }

        // fft for rows
        subarray.resize(n_cols); // each row has n_cols elements

        for(int i = 0; i < n_rows; ++i)
        {
#ifndef __clang__
#ifdef __USE_OPENMP
#pragma omp parallel for
#endif
#endif
            for(int j = 0; j < n_cols; ++j) {
#ifdef __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
                subarray[j] = data[i][j];
#else
                subarray[j] = data(i,j);
#endif
            }

            if(!CFFT<std::vector<complex_type>,1>::FFT_inplace(subarray, size2,
                                                               fft_direction,
                                                               error_description))
            {
                return false;
            }

#ifndef __clang__
#ifdef __USE_OPENMP
#pragma omp parallel for
#endif
#endif
            for(int j = 0; j < n_cols; ++j) {
#ifdef __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
                data[i][j] = subarray[j];
#else
                data(i,j) = subarray[j];
#endif
            }
        }

        return true;
    }
};

// 3D FFT
template <class TComplexArray3D>
struct CFFT<TComplexArray3D,3>
{
    static bool FFT_inplace(TComplexArray3D & data, const size_t size1, const size_t size2,
                            const size_t size3, const FFT_direction fft_direction,
                            const char *& error_description)
    {
        int n_rows  = static_cast<int>(size1);
        int n_cols  = static_cast<int>(size2);
        int n_depth = static_cast<int>(size3);

        std::vector<complex_type> subarray(n_rows); // for fft for columns: each column has n_rows elements

        for(int k = 0; k < n_depth; ++k) // for all depth layers
        {
            // fft for columns
            for(int j = 0; j < n_cols; ++j)
            {
#ifndef __clang__
#ifdef __USE_OPENMP
#pragma omp parallel for
#endif
#endif
                for(int i = 0; i < n_rows; ++i) {
#ifdef __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
                    subarray[i] = data[i][j][k];
#else
                    subarray[i] = data(i,j,k);
#endif
                }

                if(!CFFT<std::vector<complex_type>,1>::FFT_inplace(subarray, size1,
                                                                   fft_direction,
                                                                   error_description))
                {
                    return false;
                }

#ifndef __clang__
#ifdef __USE_OPENMP
#pragma omp parallel for
#endif
#endif
                for(int i = 0; i < n_rows; ++i) {
#ifdef __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
                    data[i][j][k] = subarray[i];
#else
                    data(i,j,k) = subarray[i];
#endif
                }
            }
        }

        subarray.resize(n_cols); // for fft for rows: each row has n_cols elements

        for(int k = 0; k < n_depth; ++k) // for all depth layers
        {
            // fft for rows
            for(int i = 0; i < n_rows; ++i)
            {
#ifndef __clang__
#ifdef __USE_OPENMP
#pragma omp parallel for
#endif
#endif
                for(int j = 0; j < n_cols; ++j) {
#ifdef __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
                    subarray[j] = data[i][j][k];
#else
                    subarray[j] = data(i,j,k);
#endif
                }

                if(!CFFT<std::vector<complex_type>,1>::FFT_inplace(subarray, size2,
                                                                   fft_direction,
                                                                   error_description))
                {
                    return false;
                }

#ifndef __clang__
#ifdef __USE_OPENMP
#pragma omp parallel for
#endif
#endif
                for(int j = 0; j < n_cols; ++j) {
#ifdef __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
                    data[i][j][k] = subarray[j];
#else
                    data(i,j,k) = subarray[j];
#endif
                }
            }
        }

        // fft for depth
        subarray.resize(n_depth); // each depth strip contains n_depth elements

        for(int i = 0; i < n_rows; ++i) // for all rows layers
        {
            for(int j = 0; j < n_cols; ++j) // for all cols layers
            {
#ifndef __clang__
#ifdef __USE_OPENMP
#pragma omp parallel for
#endif
#endif
                for(int k = 0; k < n_depth; ++k) {
#ifdef __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
                    subarray[k] = data[i][j][k];
#else
                    subarray[k] = data(i,j,k);
#endif
                }

                if(!CFFT<std::vector<complex_type>,1>::FFT_inplace(subarray, size3,
                                                                   fft_direction,
                                                                   error_description))
                {
                    return false;
                }

#ifndef __clang__
#ifdef __USE_OPENMP
#pragma omp parallel for
#endif
#endif
                for(int k = 0; k < n_depth; ++k) {
#ifdef __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
                    data[i][j][k] = subarray[k];
#else
                    data(i,j,k) = subarray[k];
#endif
                }
            }
        }

        return true;
    }
};

} // namespace impl
} // namespace simple_fft

#endif // __SIMPLE_FFT__FFT_IMPL_HPP__


================================================
FILE: include/simple_fft/fft_settings.h
================================================
/**
 * Copyright (c) 2013-2020 Dmitry Ivanov
 *
 * This file is a part of Simple-FFT project and is distributed under the terms
 * of MIT license: https://opensource.org/licenses/MIT
 */

// In this file you can alter some settings of the library:
// 1) Specify the desired real and complex types by typedef'ing real_type and complex_type.
//    By default real_type is double and complex_type is std::complex<real_type>.
// 2) If the array class uses square brackets for element access operator, define
//    the macro __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR

#ifndef __SIMPLE_FFT__FFT_SETTINGS_H__
#define __SIMPLE_FFT__FFT_SETTINGS_H__

#include <complex>

typedef double real_type;
typedef std::complex<real_type> complex_type;

//#ifndef __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
//#define __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
//#endif

#endif // __SIMPLE_FFT__FFT_SETTINGS_H__


================================================
FILE: unit-tests/test_fft.cpp
================================================
#include "test_fft.h"
#include "../include/simple_fft/fft_settings.h"

namespace simple_fft {
namespace fft_test {

void makeGrid1D(const real_type grid_min, const real_type grid_max,
                const int n_grid_points, std::vector<real_type> & grid)
{
    grid.resize(n_grid_points);

    real_type grid_length = grid_max - grid_min;
    size_t n_intervals = n_grid_points - 1;

#ifndef __clang__
#ifdef __USE_OPENMP
#pragma omp parallel for
#endif
#endif
    for(int i = 0; i < n_grid_points; ++i) {
        grid[i] = grid_min + grid_length * i / n_intervals;
    }
}

void makeGridsForPulse(std::vector<real_type> & t, std::vector<real_type> & x)
{
    using namespace pulse_params;

    // Time grid parameters
    const real_type tmin_norm = -8 * t0 * w0;   // normalized min time value
    const real_type tmax_norm =  8 * t0 * w0;   // normalized max time value

    // Transverse spatial x grid parameters
    const real_type xmin = -50 * x0;
    const real_type xmax =  50 * x0;

    makeGrid1D(tmin_norm, tmax_norm, nt, t);
    makeGrid1D(xmin, xmax, nx, x);
}

void makeGridsForPulse3D(std::vector<real_type> & t, std::vector<real_type> & x,
                         std::vector<real_type> & y)
{
    makeGridsForPulse(t, x);

    using namespace pulse_params;

    // Transverse spatial y grid parameters
    const real_type ymin = -50 * y0;
    const real_type ymax =  50 * y0;

    makeGrid1D(ymin, ymax, ny, y);
}

} // namespace fft_test
} // namespace simple_fft


================================================
FILE: unit-tests/test_fft.h
================================================
/*
 * 1D, 2D and 3D electromagnetic fields of femtosecond laser pulses and
 * the respective spectra are considered for testing of FFT functions. This
 * file and test_fft.hpp contain respectively declaration and implementation of
 * common structs and functions used to test FFT with particular types of
 * multidimensional arrays
 */

#ifndef __SIMPLE_FFT__UNIT_TESTS__TEST_FFT_H__
#define __SIMPLE_FFT__UNIT_TESTS__TEST_FFT_H__

#include "../include/simple_fft/fft_settings.h"
#include <math.h>
#include <vector>

namespace simple_fft {
namespace fft_test {

enum
{
    SUCCESS = 0,
    FAILURE = 1
};

namespace pulse_params
{
#ifndef M_PI
#define M_PI 3.1415926535897932
#endif

    // Femtosecond laser pulse parameters
    const real_type c = 3e10;            // lightspeed in vacuum, [cm/sec]
    const real_type t0 = 4e-15;          // pulse time duration, [sec]
    const real_type lambda0 = 780e-7;    // pulse central wavelength, [cm]
    const real_type x0 = 20 * lambda0;   // pulse spatial x size, [cm]
    const real_type y0 = 20 * lambda0;   // pulse spatial y size, [cm]
    const real_type w0 = 2.0 * M_PI * c / lambda0;  // pulse central frequency, [n.u.]

    // Grids sizes
    const size_t nt = 128, nx = 128, ny = 64;

} // namespace pulse_params

void makeGrid1D(const real_type grid_min, const real_type grid_max,
                const int n_grid_points, std::vector<real_type> & grid);

void makeGridsForPulse(std::vector<real_type> & t, std::vector<real_type> & x);

void makeGridsForPulse3D(std::vector<real_type> & t, std::vector<real_type> & x,
                         std::vector<real_type> & y);

template <class TArray1D, class TArray2D, bool real_initial_signal>
struct CMakeInitialPulses
{
    static void makeInitialPulses(TArray1D & pulse1D, TArray2D & pulse2D);
};

template <class TArray1D, class TArray2D, class TArray3D, bool real_initial_signal>
struct CMakeInitialPulses3D
{
    static void makeInitialPulses(TArray1D & pulse1D, TArray2D & pulse2D,
                                  TArray3D & pulse3D);
};

template <class TField1D, class TComplexArray1D, class TField2D, class TComplexArray2D>
struct CTestFFT
{
    static int testFFT(const TField1D & initial_field_1D,
                       const TField2D & initial_field_2D,
                       TComplexArray1D & spectrum_1D,
                       TComplexArray1D & restored_field_1D,
                       TComplexArray2D & spectrum_2D,
                       TComplexArray2D & restored_field_2D,
                       const std::vector<real_type> & t,
                       const std::vector<real_type> & x);
};

template <class TField1D, class TComplexArray1D, class TField2D, class TComplexArray2D,
          class TField3D, class TComplexArray3D>
struct CTestFFT3D
{
    static int testFFT(const TField1D & initial_field_1D,
                       const TField2D & initial_field_2D,
                       const TField3D & initial_field_3D,
                       TComplexArray1D & spectrum_1D,
                       TComplexArray1D & restored_field_1D,
                       TComplexArray2D & spectrum_2D,
                       TComplexArray2D & restored_field_2D,
                       TComplexArray3D & spectrum_3D,
                       TComplexArray3D & restored_field_3D,
                       const std::vector<real_type> & t,
                       const std::vector<real_type> & x,
                       const std::vector<real_type> & y);
};

template <class TRealArray1D, class TComplexArray1D, class TRealArray2D, class TComplexArray2D>
bool commonPartsForTests(TRealArray1D & E1_real, TRealArray2D & E2_real,
                         TComplexArray1D & E1_complex, TComplexArray2D & E2_complex,
                         TComplexArray1D & G1, TComplexArray2D & G2,
                         TComplexArray1D & E1_restored, TComplexArray2D & E2_restored,
                         const std::vector<real_type> & t, const std::vector<real_type> & x);

template <class TRealArray1D, class TComplexArray1D, class TRealArray2D,
          class TComplexArray2D, class TRealArray3D, class TComplexArray3D>
bool commonPartsForTests3D(TRealArray1D & E1_real, TRealArray2D & E2_real, TRealArray3D & E3_real,
                           TComplexArray1D & E1_complex, TComplexArray2D & E2_complex,
                           TComplexArray3D & E3_complex, TComplexArray1D & G1,
                           TComplexArray2D & G2, TComplexArray3D & G3,
                           TComplexArray1D & E1_restored, TComplexArray2D & E2_restored,
                           TComplexArray3D & E3_restored, const std::vector<real_type> & t,
                           const std::vector<real_type> & x, const std::vector<real_type> & y);

// Declarations for functions used for FFT tests with different array types

// Native C++ arrays
int testNativeArraysFFT();
int testStdVectorsFFT();

// Boost multiarray and/or matrix from ublas:
#ifdef HAS_BOOST_PACKAGE
#ifdef HAS_BOOST_MULTI_ARRAY
int testBoostMultiArray();
#endif // HAS_BOOST_MULTI_ARRAY

#ifdef HAS_BOOST_UBLAS
int testBoostUblas(); // only vector and matrix
#endif // HAS_BOOST_UBLAS

#endif // HAS_BOOST_PACKAGE

// Eigen 3x library
#ifdef HAS_EIGEN
int testEigen();
#endif

// marray
#ifdef HAS_MARRAY
int testMarray();
#endif

// Armadillo C++
#ifdef HAS_ARMADILLO
int testArmadillo();
#endif

// Blitz++
#ifdef HAS_BLITZ
int testBlitz();
#endif

#ifdef HAS_STLSOFT
int testStlSoft();
#endif

} // namespace fft_test
} // namespace simple_fft

#endif // __SIMPLE_FFT__UNIT_TESTS__TEST_FFT_H__

#include "test_fft.hpp"


================================================
FILE: unit-tests/test_fft.hpp
================================================
#ifndef __SIMPLE_FFT__UNIT_TESTS__TEST_FFT_HPP__
#define __SIMPLE_FFT__UNIT_TESTS__TEST_FFT_HPP__

#include "test_fft.h"
#include "../include/simple_fft/fft.h"
#include "../include/simple_fft/check_fft.hpp"
#include <iostream>
#include <iomanip>

namespace simple_fft {
namespace fft_test {

// Make initial real_type pulses - explicit template specialization of struct
template  <class TArray1D, class TArray2D>
struct CMakeInitialPulses<TArray1D,TArray2D,true>
{
    static void makeInitialPulses(TArray1D & pulse1D, TArray2D & pulse2D)
    {
        using namespace pulse_params;

        std::vector<real_type> t, x;
        makeGridsForPulse(t, x);

        const int nt = t.size();
        const int nx = x.size();

        std::cout << "FFT test: creating real 1D pulse." << std::endl;

#ifndef __clang__
#ifdef __USE_OPENMP
#pragma omp parallel for
#endif
#endif
        for(int i = 0; i < nt; ++i) {
#ifdef __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
            pulse1D[i] = std::exp(-2 * std::pow(t[i] / (w0 * t0), 2)) * sin(t[i]);
#else
            pulse1D(i) = std::exp(-2 * std::pow(t[i] / (w0 * t0), 2)) * sin(t[i]);
#endif
        }

        std::cout << "Done." << std::endl;

        std::cout << "FFT test: creating real 2D pulse." << std::endl;

#ifndef __clang__
#ifdef __USE_OPENMP
#pragma omp parallel for
#endif
#endif
        for(int i = 0; i < nt; ++i) {
            for(int j = 0; j < nx; ++j) {
#ifdef __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
                pulse2D[i][j] = std::exp(-2 * std::pow(x[j] / x0, 2)) *
                                std::exp(-2 * std::pow(t[i] / (w0 * t0), 2)) * sin(t[i]);
#else
                pulse2D(i,j) = std::exp(-2 * std::pow(x[j] / x0, 2)) *
                               std::exp(-2 * std::pow(t[i] / (w0 * t0), 2)) * sin(t[i]);
#endif
            }
        }

        std::cout << "Done." << std::endl;
    }
};

template  <class TArray1D, class TArray2D>
struct CMakeInitialPulses<TArray1D,TArray2D,false>
{
    static void makeInitialPulses(TArray1D & pulse1D, TArray2D & pulse2D)
    {
        using namespace pulse_params;

        std::vector<real_type> t, x;
        makeGridsForPulse(t, x);

        const int nt = t.size();
        const int nx = x.size();

        std::cout << "FFT test: creating real 1D pulse." << std::endl;

#ifndef __clang__
#ifdef __USE_OPENMP
#pragma omp parallel for
#endif
#endif
        for(int i = 0; i < nt; ++i) {
#ifdef __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
            pulse1D[i] = complex_type(std::exp(-2 * std::pow(t[i] / (w0 * t0), 2)) * sin(t[i]), 0.0);
#else
            pulse1D(i) = complex_type(std::exp(-2 * std::pow(t[i] / (w0 * t0), 2)) * sin(t[i]), 0.0);
#endif
        }

        std::cout << "Done." << std::endl;

        std::cout << "FFT test: creating real 2D pulse." << std::endl;

#ifndef __clang__
#ifdef __USE_OPENMP
#pragma omp parallel for
#endif
#endif
        for(int i = 0; i < nt; ++i) {
            for(int j = 0; j < nx; ++j) {
#ifdef __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
                pulse2D[i][j] = complex_type(std::exp(-2 * std::pow(x[j] / x0, 2)) *
                                             std::exp(-2 * std::pow(t[i] / (w0 * t0), 2)) *
                                             sin(t[i]), 0.0);
#else
                pulse2D(i,j) = complex_type(std::exp(-2 * std::pow(x[j] / x0, 2)) *
                                            std::exp(-2 * std::pow(t[i] / (w0 * t0), 2)) *
                                            sin(t[i]), 0.0);
#endif
            }
        }

        std::cout << "Done." << std::endl;
    }
};

// Make initial real_type pulses - explicit template specialization of struct
template <class TArray1D, class TArray2D, class TArray3D>
struct CMakeInitialPulses3D<TArray1D,TArray2D,TArray3D,true>
{
    static void makeInitialPulses(TArray1D & pulse1D, TArray2D & pulse2D, TArray3D & pulse3D)
    {
        using namespace pulse_params;

        std::vector<real_type> t, x, y;
        makeGridsForPulse3D(t, x, y);

        const int nt = t.size();
        const int nx = x.size();
        const int ny = y.size();

        CMakeInitialPulses<TArray1D,TArray2D,true>::makeInitialPulses(pulse1D, pulse2D);

        std::cout << "FFT test: creating real 3D pulse." << std::endl;

#ifndef __clang__
#ifdef __USE_OPENMP
#pragma omp parallel for
#endif
#endif
        for(int i = 0; i < nt; ++i) {
            for(int j = 0; j < nx; ++j) {
                for(int k = 0; k < ny; ++k) {
#ifdef __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
                    pulse3D[i][j][k] = std::exp(-2 * std::pow(x[j] / x0, 2)) *
                                       std::exp(-2 * std::pow(y[k] / y0, 2)) *
                                       std::exp(-2 * std::pow(t[i] / (w0 * t0), 2)) *
                                       sin(t[i]);
#else
                    pulse3D(i,j,k) = std::exp(-2 * std::pow(x[j] / x0, 2)) *
                                     std::exp(-2 * std::pow(y[k] / y0, 2)) *
                                     std::exp(-2 * std::pow(t[i] / (w0 * t0), 2)) *
                                     sin(t[i]);
#endif
                }
            }
        }

        std::cout << "Done." << std::endl;
    }
};

// Make initial complex_type pulses - explicit template specialization of struct
template <class TArray1D, class TArray2D, class TArray3D>
struct CMakeInitialPulses3D<TArray1D,TArray2D,TArray3D,false>
{
    static void makeInitialPulses(TArray1D & pulse1D, TArray2D & pulse2D, TArray3D & pulse3D)
    {
        using namespace pulse_params;

        std::vector<real_type> t, x, y;
        makeGridsForPulse3D(t, x, y);

        const int nt = t.size();
        const int nx = x.size();
        const int ny = y.size();

        CMakeInitialPulses<TArray1D,TArray2D,false>::makeInitialPulses(pulse1D, pulse2D);

        std::cout << "FFT test: creating real 3D pulse." << std::endl;

#ifndef __clang__
#ifdef __USE_OPENMP
#pragma omp parallel for
#endif
#endif
        for(int i = 0; i < nt; ++i) {
            for(int j = 0; j < nx; ++j) {
                for(int k = 0; k < ny; ++k) {
#ifdef __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
                    pulse3D[i][j][k] = complex_type(std::exp(-2 * std::pow(x[j] / x0, 2)) *
                                                    std::exp(-2 * std::pow(y[k] / y0, 2)) *
                                                    std::exp(-2 * std::pow(t[i] / (w0 * t0), 2)) *
                                                    sin(t[i]), 0.0);
#else
                    pulse3D(i,j,k) = complex_type(std::exp(-2 * std::pow(x[j] / x0, 2)) *
                                                  std::exp(-2 * std::pow(y[k] / y0, 2)) *
                                                  std::exp(-2 * std::pow(t[i] / (w0 * t0), 2)) *
                                                  sin(t[i]), 0.0);
#endif
                }
            }
        }

        std::cout << "Done." << std::endl;
    }
};

template <class TField1D, class TComplexArray1D, class TField2D, class TComplexArray2D>
int CTestFFT<TField1D,TComplexArray1D,TField2D,TComplexArray2D>::testFFT(const TField1D & initial_field_1D,
                                                                         const TField2D & initial_field_2D,
                                                                         TComplexArray1D & spectrum_1D,
                                                                         TComplexArray1D & restored_field_1D,
                                                                         TComplexArray2D & spectrum_2D,
                                                                         TComplexArray2D & restored_field_2D,
                                                                         const std::vector<real_type> & t,
                                                                         const std::vector<real_type> & x)
{
#ifndef M_PI
#define M_PI 3.1415926535897932
#endif

    bool res;
    const char * error_description = 0;
    const real_type relative_tolerance = 1e-4;
    real_type discrepancy;

    std::cout << std::setprecision(20);

    int grid_size_t = static_cast<int>(t.size());
    int grid_size_x = static_cast<int>(x.size());

    // Testing 1D complex FFT and IFFT
    std::cout << "FFT test: testing 1D forward FFT." << std::endl;

    res = simple_fft::FFT(initial_field_1D, spectrum_1D, static_cast<size_t>(grid_size_t),
                          error_description);
    if (!res) {
        std::cout << "FFT test: 1D forward FFT returned with error! Error description: "
                  << error_description << std::endl;
        return FAILURE;
    }

    std::cout << "Done." << std::endl;
    std::cout << "FFT test: checking 1D Parseval theorem with relative tolerance = "
              << relative_tolerance <<  std::endl;

    res = simple_fft::check_fft::checkParsevalTheorem(initial_field_1D, spectrum_1D,
                                                      static_cast<size_t>(grid_size_t),
                                                      relative_tolerance, discrepancy,
                                                      error_description);
    if (!res) {
        std::cout << "FFT test: checking 1D Parseval theorem returned with error! "
                  << "Error description: " << error_description << ", discrepancy = "
                  << discrepancy << std::endl;
        return FAILURE;
    }

    std::cout << "FFT test: checking 1D Parseval theorem completed successfully, "
              << "discrepancy = " << discrepancy << std::endl;

    std::cout << "FFT test: testing 1D inverse FFT." << std::endl;

    res = simple_fft::IFFT(spectrum_1D, restored_field_1D, static_cast<size_t>(grid_size_t),
                           error_description);
    if (!res) {
        std::cout << "FFT test: 1D inverse FFT returned with error! "
                  << "Error description: " << error_description << std::endl;
        return FAILURE;
    }

    std::cout << "Done." << std::endl;
    std::cout << "FFT test: checking 1D energy conservation law with relative tolerance = "
              << relative_tolerance << std::endl;

    res = simple_fft::check_fft::checkEnergyConservation(initial_field_1D, restored_field_1D,
                                                         static_cast<size_t>(grid_size_t),
                                                         relative_tolerance, discrepancy,
                                                         error_description);
    if (!res) {
        std::cout << "FFT test: checking 1D energy conservation law returned with error! "
                  << "Error description: " << error_description
                  << ", discrepancy = " << discrepancy << std::endl;
        return FAILURE;
    }

    std::cout << "FFT test: checking 1D energy conservation law completed successfully, "
              << "discrepancy = " << discrepancy << std::endl;

    std::cout << "FFT test: checking equality of 1D arrays after FFT and IFFT with "
              << "relative tolerance = " << relative_tolerance << std::endl;

    discrepancy = 0.0;
    res = simple_fft::check_fft::checkEquality(initial_field_1D, restored_field_1D,
                                               static_cast<size_t>(grid_size_t),
                                               relative_tolerance, discrepancy,
                                               error_description);
    if (!res) {
        std::cout << "FFT test: checking 1D equality test returned with error! "
                  << "Error description: " << error_description
                  << ", discrepancy = " << discrepancy << std::endl;
        return FAILURE;
    }

    std::cout << "FFT test: checking equality of 1D arrays after FFT and IFFT completed "
              << "successfully, discrepancy = " << discrepancy << std::endl;

    // Testing 2D FFT and IFFT
    std::cout << "Testing 2D forward FFT." << std::endl;

    res = simple_fft::FFT(initial_field_2D, spectrum_2D, static_cast<size_t>(grid_size_t),
                          static_cast<size_t>(grid_size_x), error_description);
    if (!res) {
        std::cout << "FFT test: forward 2D FFT returned with error! Error description: "
                  << error_description << std::endl;
        return FAILURE;
    }

    std::cout << "Done." << std::endl;
    std::cout << "FFT test: checking 2D Parseval theorem with relative tolerance = "
              << relative_tolerance << std::endl;

    res = simple_fft::check_fft::checkParsevalTheorem(initial_field_2D, spectrum_2D,
                                                      static_cast<size_t>(grid_size_t),
                                                      static_cast<size_t>(grid_size_x),
                                                      relative_tolerance, discrepancy,
                                                      error_description);
    if (!res) {
        std::cout << "FFT test: checking 2D Parseval theorem returned with error! "
                  << "Error description: " << error_description
                  << ", discrepancy = " << discrepancy << std::endl;
        return FAILURE;
    }

    std::cout << "FFT test: checking 2D Parseval theorem completed successfully, "
              << "discrepancy = " << discrepancy << std::endl;
    std::cout << "FFT test: testing inverse 2D FFT." << std::endl;

    res = simple_fft::IFFT(spectrum_2D, restored_field_2D, static_cast<size_t>(grid_size_t),
                           static_cast<size_t>(grid_size_x), error_description);
    if (!res) {
        std::cout << "FFT test: inverse 2D FFT returned with error! Error description: "
                  << error_description << std::endl;
        return FAILURE;
    }

    std::cout << "Done." << std::endl;
    std::cout << "FFT test: checking 2D energy conservation law with relative tolerance = "
              << relative_tolerance << std::endl;

    res = simple_fft::check_fft::checkEnergyConservation(initial_field_2D, restored_field_2D,
                                                         static_cast<size_t>(grid_size_t),
                                                         static_cast<size_t>(grid_size_x),
                                                         relative_tolerance, discrepancy,
                                                         error_description);
    if (!res) {
        std::cout << "FFT test: checking 2D energy conservation law returned with error! "
                  << "Error description: " << error_description
                  << ", discrepancy = " << discrepancy << std::endl;
        return FAILURE;
    }

    std::cout << "FFT test: checking 2D energy conservation law completed successfully, "
                  << "discrepancy = " << discrepancy << std::endl;

    std::cout << "FFT test: checking equality of 2D arrays after FFT and IFFT with "
              << "relative tolerance = " << relative_tolerance << std::endl;

    discrepancy = 0.0;
    res = simple_fft::check_fft::checkEquality(initial_field_2D, restored_field_2D,
                                               static_cast<size_t>(grid_size_t),
                                               static_cast<size_t>(grid_size_x),
                                               relative_tolerance, discrepancy,
                                               error_description);
    if (!res) {
        std::cout << "FFT test: checking 2D equality test returned with error! "
                  << "Error description: " << error_description
                  << ", discrepancy = " << discrepancy << std::endl;
        return FAILURE;
    }

    std::cout << "FFT test: checking equality of 2D arrays after FFT and IFFT completed "
              << "successfully, discrepancy = " << discrepancy << std::endl;

    std::cout << "FFT test: 1D and 2D FFT and IFFT tests completed successfully!"
              << std::endl;

    return SUCCESS;
}

template <class TField1D, class TComplexArray1D, class TField2D, class TComplexArray2D,
          class TField3D, class TComplexArray3D>
int CTestFFT3D<TField1D,TComplexArray1D,TField2D,TComplexArray2D,TField3D,
               TComplexArray3D>::testFFT(const TField1D & initial_field_1D,
                                         const TField2D & initial_field_2D,
                                         const TField3D & initial_field_3D,
                                         TComplexArray1D & spectrum_1D,
                                         TComplexArray1D & restored_field_1D,
                                         TComplexArray2D & spectrum_2D,
                                         TComplexArray2D & restored_field_2D,
                                         TComplexArray3D & spectrum_3D,
                                         TComplexArray3D & restored_field_3D,
                                         const std::vector<real_type> & t,
                                         const std::vector<real_type> & x,
                                         const std::vector<real_type> & y)
{
#ifndef M_PI
#define M_PI 3.1415926535897932
#endif
    int res2D;
    bool res;
    const char * error_description = 0;
    const real_type relative_tolerance = 1e-4;
    real_type discrepancy;

    std::cout << std::setprecision(20);

    int grid_size_t = static_cast<int>(t.size());
    int grid_size_x = static_cast<int>(x.size());
    int grid_size_y = static_cast<int>(y.size());

    // Test 1D and 2D FFT and IFFT
    res2D = CTestFFT<TField1D,TComplexArray1D,TField2D,TComplexArray2D>::testFFT(initial_field_1D,
                                    initial_field_2D, spectrum_1D, restored_field_1D, spectrum_2D,
                                    restored_field_2D, t, x);
    if (res2D != SUCCESS) {
        return res2D;
    }

    // Test 3D FFT and IFFT
    std::cout << "Testing 3D forward FFT." << std::endl;

    res = simple_fft::FFT(initial_field_3D, spectrum_3D, static_cast<size_t>(grid_size_t),
                          static_cast<size_t>(grid_size_x), static_cast<size_t>(grid_size_y),
                          error_description);
    if (!res) {
        std::cout << "FFT test: forward 3D FFT returned with error! Error description: "
                  << error_description << std::endl;
        return res; 
    }

    std::cout << "Done." << std::endl;
    std::cout << "FFT test: checking 3D Parseval theorem with relative tolerance = "
              << relative_tolerance << std::endl;

    res = simple_fft::check_fft::checkParsevalTheorem(initial_field_3D, spectrum_3D,
                                                      static_cast<size_t>(grid_size_t),
                                                      static_cast<size_t>(grid_size_x),
                                                      static_cast<size_t>(grid_size_y),
                                                      relative_tolerance, discrepancy,
                                                      error_description);
    if (!res) {
        std::cout << "FFT test: checking 3D Parseval theorem returned with error! "
                  << "Error description: " << error_description
                  << ", discrepancy = " << discrepancy << std::endl;
        return FAILURE;
    }

    std::cout << "FFT test: checking 3D Parseval theorem completed successfully, "
              << "discrepancy = " << discrepancy << std::endl;
    std::cout << "FFT test: testing inverse 3D FFT." << std::endl;

    res = simple_fft::IFFT(spectrum_3D, restored_field_3D, static_cast<size_t>(grid_size_t),
                           static_cast<size_t>(grid_size_x), static_cast<size_t>(grid_size_y),
                           error_description);
    if (!res) {
        std::cout << "FFT test: inverse 3D FFT returned with error! "
                  << "Error description: " << error_description << std::endl;
        return FAILURE;
    }

    std::cout << "Done." << std::endl;
    std::cout << "FFT test: checking 3D energy conservation law wih relative tolerance = "
              << relative_tolerance << std::endl;

    res = simple_fft::check_fft::checkEnergyConservation(initial_field_3D, restored_field_3D,
                                                         static_cast<size_t>(grid_size_t),
                                                         static_cast<size_t>(grid_size_x),
                                                         static_cast<size_t>(grid_size_y),
                                                         relative_tolerance, discrepancy,
                                                         error_description);
    if (!res) {
        std::cout << "FFT test: checking energy conservation law returned with error! "
                  << "Error description: " << error_description << std::endl;
        return FAILURE;
    }

    std::cout << "FFT test: checking 3D energy conservation law completed successfully, "
              << "discrepancy = " << discrepancy << std::endl;
    
    std::cout << "FFT test: checking equality of 3D arrays after FFT and IFFT with "
              << "relative tolerance = " << relative_tolerance << std::endl;

    discrepancy = 0.0;
    res = simple_fft::check_fft::checkEquality(initial_field_3D, restored_field_3D,
                                               static_cast<size_t>(grid_size_t),
                                               static_cast<size_t>(grid_size_x),
                                               static_cast<size_t>(grid_size_y),
                                               relative_tolerance, discrepancy,
                                               error_description);
    if (!res) {
        std::cout << "FFT test: checking 3D equality test returned with error! "
                  << "Error description: " << error_description
                  << ", discrepancy = " << discrepancy << std::endl;
        return FAILURE;
    }

    std::cout << "FFT test: checking equality of 3D arrays after FFT and IFFT completed "
              << "successfully, discrepancy = " << discrepancy << std::endl;

    std::cout << "FFT test: 1D, 2D and 3D FFT and IFFT tests completed successfully!"
              << std::endl;

    return SUCCESS;
}

template <class TRealArray1D, class TComplexArray1D, class TRealArray2D, class TComplexArray2D>
bool commonPartsForTests(TRealArray1D & E1_real, TRealArray2D & E2_real,
                         TComplexArray1D & E1_complex, TComplexArray2D & E2_complex,
                         TComplexArray1D & G1, TComplexArray2D & G2,
                         TComplexArray1D & E1_restored, TComplexArray2D & E2_restored,
                         const std::vector<real_type> & t, const std::vector<real_type> & x)
{
    // Make pulses
    CMakeInitialPulses<TRealArray1D,TRealArray2D,true>::makeInitialPulses(E1_real, E2_real);
    CMakeInitialPulses<TComplexArray1D,TComplexArray2D,false>::makeInitialPulses(E1_complex,
                                                                                 E2_complex);

    // call FFT tests
    // 1) with real initial signals
    int res = CTestFFT<TRealArray1D,TComplexArray1D,
                       TRealArray2D,TComplexArray2D>::testFFT(E1_real, E2_real,
                                                              G1, E1_restored,
                                                              G2, E2_restored,
                                                              t, x);
    if (res != SUCCESS) {
        return false;
    }
    else {
        std::cout << "FFT tests for real initial signal completed successfully!"
                  << std::endl;
    }

    // 2) with complex initial signals
    res = CTestFFT<TComplexArray1D,TComplexArray1D,
                   TComplexArray2D,TComplexArray2D>::testFFT(E1_complex, E2_complex,
                                                             G1, E1_restored,
                                                             G2, E2_restored,
                                                             t, x);
    if (res != SUCCESS) {
        return false;
    }
    else {
        std::cout << "FFT tests for complex initial signal completed successfully!"
                  << std::endl;
    }

    return true;
}

template <class TRealArray1D, class TComplexArray1D, class TRealArray2D,
          class TComplexArray2D, class TRealArray3D, class TComplexArray3D>
bool commonPartsForTests3D(TRealArray1D & E1_real, TRealArray2D & E2_real,
                           TRealArray3D & E3_real, TComplexArray1D & E1_complex,
                           TComplexArray2D & E2_complex, TComplexArray3D & E3_complex,
                           TComplexArray1D & G1, TComplexArray2D & G2, TComplexArray3D & G3,
                           TComplexArray1D & E1_restored, TComplexArray2D & E2_restored,
                           TComplexArray3D & E3_restored, const std::vector<real_type> & t,
                           const std::vector<real_type> & x, const std::vector<real_type> & y)
{
    // Make pulses
    CMakeInitialPulses3D<TRealArray1D,TRealArray2D,
                         TRealArray3D,true>::makeInitialPulses(E1_real, E2_real, E3_real);
    CMakeInitialPulses3D<TComplexArray1D,TComplexArray2D,
                         TComplexArray3D,false>::makeInitialPulses(E1_complex,
                                                                   E2_complex,
                                                                   E3_complex);

    // Call FFT tests
    // 1) With real initial signals
    int res = CTestFFT3D<TRealArray1D,TComplexArray1D,
                         TRealArray2D,TComplexArray2D,
                         TRealArray3D,TComplexArray3D>::testFFT(E1_real, E2_real,
                                                                E3_real, G1,
                                                                E1_restored, G2,
                                                                E2_restored, G3,
                                                                E3_restored, t, x, y);
    if (res != SUCCESS) {
        return false;
    }
    else {
        std::cout << "FFT tests for real initial signal completed successfully!"
                  << std::endl;
    }
    // 2) With complex initial signals
    res = CTestFFT3D<TComplexArray1D,TComplexArray1D,
                     TComplexArray2D,TComplexArray2D,
                     TComplexArray3D,TComplexArray3D>::testFFT(E1_complex, E2_complex,
                                                               E3_complex, G1, E1_restored,
                                                               G2, E2_restored, G3,
                                                               E3_restored, t, x, y);
    if (res != SUCCESS) {
        return false;
    }
    else {
        std::cout << "FFT tests for complex initial signal completed successfully!"
                  << std::endl;
    }

    return true;
}

} // namespace fft_test
} // namespace simple_fft

#endif // __SIMPLE_FFT__UNIT_TESTS__TEST_FFT_HPP__


================================================
FILE: unit-tests/test_with_armadillo_matrix_and_row.cpp
================================================
#include "../include/simple_fft/fft_settings.h"

#ifdef __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
#undef __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
#endif

#include "../include/simple_fft/fft.h"
#include "test_fft.h"
#include <iostream>
#include <armadillo>

using namespace arma;

namespace simple_fft {
namespace fft_test {

int testArmadillo()
{
    std::cout << "Testing FFT algorithms with Armadillo C++" << std::endl;

    using namespace pulse_params;

    std::vector<real_type> t, x;
    makeGridsForPulse(t, x);

    // typedefing arrays
    typedef Row<real_type> RealArray1D;
    typedef Row<complex_type> ComplexArray1D;
    typedef Mat<real_type> RealArray2D;
    typedef Mat<complex_type> ComplexArray2D;

    // 1D fields and spectrum
    RealArray1D E1_real(nt);
    ComplexArray1D E1_complex(nt), G1(nt), E1_restored(nt);

    // 2D fields and spectrum
    RealArray2D E2_real(nt, nx);
    ComplexArray2D E2_complex(nt, nx), G2(nt, nx), E2_restored(nt, nx);

    if (!commonPartsForTests(E1_real, E2_real, E1_complex, E2_complex, G1, G2,
                             E1_restored, E2_restored, t, x))
    {
        std::cout << "Tests of FFT algorithms with Armadillo C++ matrix and row "
                  << "returned with errors!" << std::endl;
        return FAILURE;
    }

    std::cout << "Tests of FFT with Armadillo C++ matrix and row completed successfully!"
              << std::endl;
    return SUCCESS;
}

} // namespace fft_test
} // namespace simple_fft


================================================
FILE: unit-tests/test_with_blitz.cpp
================================================
#include "../include/simple_fft/fft_settings.h"

#ifdef __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
#undef __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
#endif

#include "../include/simple_fft/fft.h"
#include "test_fft.h"
#include <iostream>
#include <blitz/array.h>

namespace simple_fft {
namespace fft_test {

int testBlitz()
{
    std::cout << "Testing FFT algorithms with blitz" << std::endl;

    using namespace pulse_params;

    std::vector<real_type> t, x, y;
    makeGridsForPulse3D(t, x, y);

    // typedefing arrays
    typedef blitz::Array<real_type,int(1)> RealArray1D;
    typedef blitz::Array<complex_type,int(1)> ComplexArray1D;
    typedef blitz::Array<real_type,int(2)> RealArray2D;
    typedef blitz::Array<complex_type,int(2)> ComplexArray2D;
    typedef blitz::Array<real_type,int(3)> RealArray3D;
    typedef blitz::Array<complex_type,int(3)> ComplexArray3D;

    // 1D fields and spectrum
    RealArray1D E1_real(nt);
    ComplexArray1D E1_complex(nt), G1(nt), E1_restored(nt);

    // 2D fields and spectrum,
    RealArray2D E2_real(nt,nx);
    ComplexArray2D E2_complex(nt,nx), G2(nt,nx), E2_restored(nt,nx);

    // 3D fields and spectrum
    RealArray3D E3_real(nt,nx,ny);
    ComplexArray3D E3_complex(nt,nx,ny), G3(nt,nx,ny), E3_restored(nt,nx,ny);

    if (!commonPartsForTests3D(E1_real, E2_real, E3_real, E1_complex, E2_complex,
                               E3_complex, G1, G2, G3, E1_restored, E2_restored,
                               E3_restored, t, x, y))
    {
        std::cout << "Tests of FFT with blitz++ arrays returned with errors!" << std::endl;
        return FAILURE;
    }

    std::cout << "Tests of FFT with blitz++ arrays completed successfully!" << std::endl;
    return SUCCESS;
}

} // namespace fft_test
} // namespace simple_fft


================================================
FILE: unit-tests/test_with_boost_multiarray.cpp
================================================
#include "../include/simple_fft/fft_settings.h"

// boost::multi_aray uses square brackets for indices
#ifndef __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
#define __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
#endif

#include "../include/simple_fft/fft.h"
#include "test_fft.h"
#include <iostream>
#include <boost/multi_array.hpp>

namespace simple_fft {
namespace fft_test {

int testBoostMultiArray()
{
    std::cout << "Testing FFT algorithms with boost::multi_array" << std::endl;

    using namespace pulse_params;

    std::vector<real_type> t, x, y;
    makeGridsForPulse3D(t, x, y);

    // typedefing arrays
    typedef boost::multi_array<real_type,1> RealArray1D;
    typedef boost::multi_array<complex_type,1> ComplexArray1D;
    typedef boost::multi_array<real_type,2> RealArray2D;
    typedef boost::multi_array<complex_type,2> ComplexArray2D;
    typedef boost::multi_array<real_type,3> RealArray3D;
    typedef boost::multi_array<complex_type,3> ComplexArray3D;

    // 1D fields and spectrum
    RealArray1D E1_real(boost::extents[nt]);
    ComplexArray1D E1_complex(boost::extents[nt]), G1(boost::extents[nt]),
                   E1_restored(boost::extents[nt]);

    // 2D fields and spectrum
    RealArray2D E2_real(boost::extents[nt][nx]);
    ComplexArray2D E2_complex(boost::extents[nt][nx]), G2(boost::extents[nt][nx]),
                   E2_restored(boost::extents[nt][nx]);

    // 3D fields and spectrum
    RealArray3D E3_real(boost::extents[nt][nx][ny]);
    ComplexArray3D E3_complex(boost::extents[nt][nx][ny]), G3(boost::extents[nt][nx][ny]),
                   E3_restored(boost::extents[nt][nx][ny]);

    if (!commonPartsForTests3D(E1_real, E2_real, E3_real, E1_complex, E2_complex,
                               E3_complex, G1, G2, G3, E1_restored, E2_restored,
                               E3_restored, t, x, y))
    {
        std::cout << "Tests of FFT with boost::multi_array returned with errors!"
                  << std::endl;
        return FAILURE;
    }

    std::cout << "Tests of FFT with boost::multi_array completed successfully!" << std::endl;
    return SUCCESS;
}

} // namespace fft_test
} // namespace simple_fft


================================================
FILE: unit-tests/test_with_boost_ublas_vector_matrix.cpp
================================================
#include"../include/simple_fft/fft_settings.h"

#ifdef __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
#undef __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
#endif

#include "../include/simple_fft/fft.h"
#include "test_fft.h"
#include <iostream>
#include <boost/numeric/ublas/vector.hpp>
#include <boost/numeric/ublas/matrix.hpp>

namespace simple_fft {
namespace fft_test {

int testBoostUblas()
{
    std::cout << "Testing FFT algorithms with boost::numerics::ublas vector and matrix"
              << std::endl;

    using namespace pulse_params;

    std::vector<real_type> t, x;
    makeGridsForPulse(t, x);

    // typedefing arrays
    typedef boost::numeric::ublas::vector<real_type> RealArray1D;
    typedef boost::numeric::ublas::vector<complex_type> ComplexArray1D;
    typedef boost::numeric::ublas::matrix<real_type> RealArray2D;
    typedef boost::numeric::ublas::matrix<complex_type> ComplexArray2D;

    // 1D fields and spectrum
    RealArray1D E1_real(nt);
    ComplexArray1D E1_complex(nt), G1(nt), E1_restored(nt);

    // 2D fields and spectrum
    RealArray2D E2_real(nt, nx);
    ComplexArray2D E2_complex(nt, nx), G2(nt, nx), E2_restored(nt, nx);

    if (!commonPartsForTests(E1_real, E2_real, E1_complex, E2_complex,
                             G1, G2, E1_restored, E2_restored, t, x))
    {
        std::cout << "Tests of FFT with boost::numeric::ublas vector and matrix "
                  << "returned with errors!" << std::endl;
        return FAILURE;
    }

    std::cout << "Tests of FFT with boost::numeric::ublas vector and matrix "
              << "completed successfully!" << std::endl;
    return SUCCESS;

}

} // namespace fft_test
} // namespace simple_fft


================================================
FILE: unit-tests/test_with_eigen_vector_matrix.cpp
================================================
#include "../include/simple_fft/fft_settings.h"

#ifdef __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
#undef __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
#endif

#include "../include/simple_fft/fft.h"
#include "test_fft.h"
#include <iostream>
#include <eigen3/Eigen/Eigen>

namespace simple_fft {
namespace fft_test {

int testEigen()
{
    std::cout << "Testing FFT algorithms with Eigen 3.x vector and matrix"
              << std::endl;

    using namespace pulse_params;

    std::vector<real_type> t, x;
    makeGridsForPulse(t, x);

    // typedefing arrays
    typedef Eigen::Matrix<real_type, Eigen::Dynamic, 1> RealArray1D;
    typedef Eigen::Matrix<complex_type, Eigen::Dynamic, 1> ComplexArray1D;
    typedef Eigen::Matrix<real_type, Eigen::Dynamic, Eigen::Dynamic> RealArray2D;
    typedef Eigen::Matrix<complex_type, Eigen::Dynamic, Eigen::Dynamic> ComplexArray2D;

    // 1D fields and spectrum
    RealArray1D E1_real(nt);
    ComplexArray1D E1_complex(nt), G1(nt), E1_restored(nt);

    // 2D fields and spectrum
    RealArray2D E2_real(nt, nx);
    ComplexArray2D E2_complex(nt, nx), G2(nt, nx), E2_restored(nt, nx);

    if (!commonPartsForTests(E1_real, E2_real, E1_complex, E2_complex,
                             G1, G2, E1_restored, E2_restored, t, x))
    {
        std::cout << "Tests of FFT with Eigen 3.x vector and matrix "
                  << "returned with errors!" << std::endl;
        return FAILURE;
    }

    std::cout << "Tests of FFT with Eigen 3.x vector and matrix "
              << "completed successfully!" << std::endl;
    return SUCCESS;
}

} // namespace fft_test
} // namespace simple_fft


================================================
FILE: unit-tests/test_with_marray.cpp
================================================
#include "../include/simple_fft/fft_settings.h"

#ifdef __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
#undef __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
#endif

#include "../include/simple_fft/fft.h"
#include "test_fft.h"
#include <iostream>
#include <marray/marray.hxx>

namespace simple_fft {
namespace fft_test {

int testMarray()
{
    std::cout << "Testing FFT algorithms with marray" << std::endl;

    using namespace pulse_params;

    std::vector<real_type> t, x, y;
    makeGridsForPulse3D(t, x, y);

    // typedefing arrays
    typedef marray::Vector<real_type> RealArray1D;
    typedef marray::Vector<complex_type> ComplexArray1D;
    typedef marray::Matrix<real_type> RealArray2D;
    typedef marray::Matrix<complex_type> ComplexArray2D;
    typedef marray::Marray<real_type> RealArray3D;
    typedef marray::Marray<complex_type> ComplexArray3D;

    // 1D fields and spectrum
    RealArray1D E1_real(marray::SkipInitialization, nt);
    ComplexArray1D E1_complex(marray::SkipInitialization, nt),
                   G1(marray::SkipInitialization, nt),
                   E1_restored(marray::SkipInitialization, nt);

    // 2D fields and spectrum
    RealArray2D E2_real(marray::SkipInitialization, nt, nx);
    ComplexArray2D E2_complex(marray::SkipInitialization, nt, nx),
                   G2(marray::SkipInitialization, nt, nx),
                   E2_restored(marray::SkipInitialization, nt, nx);

    // 3D fields and spectrum
    size_t shape3d[] = {nt, nx, ny};
    RealArray3D E3_real(marray::SkipInitialization, shape3d, shape3d + 3);
    ComplexArray3D E3_complex(marray::SkipInitialization, shape3d, shape3d + 3),
                   G3(marray::SkipInitialization, shape3d, shape3d + 3),
                   E3_restored(marray::SkipInitialization, shape3d, shape3d + 3);

    if (!commonPartsForTests3D(E1_real, E2_real, E3_real, E1_complex, E2_complex,
                               E3_complex, G1, G2, G3, E1_restored, E2_restored,
                               E3_restored, t, x, y))
    {
        std::cout << "Tests of FFT with marray returned with errors!" << std::endl;
        return FAILURE;
    }

    std::cout << "Tests of FFT with marray completed successfully!" << std::endl;
    return SUCCESS;
}

} // namespace fft_test
} // namespace simple_fft


================================================
FILE: unit-tests/test_with_native_cpp_pointer_based_arrays.cpp
================================================
#include "../include/simple_fft/fft_settings.h"

// Native C++ arrays use square brackets for element access operator
#ifndef __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
#define __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
#endif

#include "../include/simple_fft/fft.h"
#include "test_fft.h"
#include <iostream>

namespace simple_fft {
namespace fft_test {

int testNativeArraysFFT()
{
    std::cout << "Testing FFT algorithms with native C++ arrays on heap using "
              << "pointers to operate them" << std::endl;

    using namespace pulse_params;

    std::vector<real_type> t, x, y;
    makeGridsForPulse3D(t, x, y);

    // typedefing arrays
    typedef real_type* RealArray1D;
    typedef complex_type* ComplexArray1D;
    typedef real_type** RealArray2D;
    typedef complex_type** ComplexArray2D;
    typedef real_type*** RealArray3D;
    typedef complex_type*** ComplexArray3D;

    // 1D fields and spectrum
    RealArray1D E1_real = new real_type[nt];
    ComplexArray1D E1_complex = new complex_type[nt];
    ComplexArray1D G1 = new complex_type[nt];
    ComplexArray1D E1_restored = new complex_type[nt];

    // 2D fields and spectrum
    RealArray2D E2_real = new RealArray1D[nt];
    for(size_t i = 0; i < nt; ++i) {
        E2_real[i] = new real_type[nx];
    }

    ComplexArray2D E2_complex = new ComplexArray1D[nt];
    for(size_t i = 0; i < nt; ++i) {
        E2_complex[i] = new complex_type[nx];
    }

    ComplexArray2D G2 = new ComplexArray1D[nt];
    for(size_t i = 0; i < nt; ++i) {
        G2[i] = new complex_type[nx];
    }

    ComplexArray2D E2_restored = new ComplexArray1D[nt];
    for(size_t i = 0; i < nt; ++i) {
        E2_restored[i] = new complex_type[nx];
    }

    // 3D fields and spectrum
    RealArray3D E3_real = new RealArray2D[nt];
    for(size_t i = 0; i < nt; ++i) {
        E3_real[i] = new RealArray1D[nx];
        for(size_t j = 0; j < nx; ++j) {
            E3_real[i][j] = new real_type[ny];
        }
    }

    ComplexArray3D E3_complex = new ComplexArray2D[nt];
    for(size_t i = 0; i < nt; ++i) {
        E3_complex[i] = new ComplexArray1D[nx];
        for(size_t j = 0; j < nx; ++j) {
            E3_complex[i][j] = new complex_type[ny];
        }
    }

    ComplexArray3D G3 = new ComplexArray2D[nt];
    for(size_t i = 0; i < nt; ++i) {
        G3[i] = new ComplexArray1D[nx];
        for(size_t j = 0; j < nx; ++j) {
            G3[i][j] = new complex_type[ny];
        }
    }

    ComplexArray3D E3_restored = new ComplexArray2D[nt];
    for(size_t i = 0; i < nt; ++i) {
        E3_restored[i] = new ComplexArray1D[nx];
        for(size_t j = 0; j < nx; ++j) {
            E3_restored[i][j] = new complex_type[ny];
        }
    }

    if (!commonPartsForTests3D(E1_real, E2_real, E3_real, E1_complex, E2_complex,
                               E3_complex, G1, G2, G3, E1_restored, E2_restored,
                               E3_restored, t, x, y))
    {
        std::cout << "Tests of FFT with native C++ pointer-based arrays on heap "
                  << "returned with errors!" << std::endl;
        return FAILURE;
    }

    // free 3D arrays
    for(size_t j = 0; j < nx; ++j) {
        for(size_t i = 0; i < nt; ++i) {
            delete[] E3_restored[i];
            E3_restored[i] = NULL;

            delete[] G3[i];
            G3[i] = NULL;

            delete[] E3_complex[i];
            E3_complex[i] = NULL;

            delete[] E3_real[i];
            E3_real[i] = NULL;
        }

        delete[] E3_restored[j];
        E3_restored[j] = NULL;

        delete[] G3[j];
        G3[j] = NULL;

        delete[] E3_complex[j];
        E3_complex[j] = NULL;

        delete[] E3_real[j];
        E3_real[j] = 0;
    }

    delete[] E3_restored;
    delete[] G3;
    delete[] E3_complex;
    delete[] E3_real;

    // free 2D arrays
    for(size_t i = 0; i < nt; ++i) {
        delete[] E2_restored[i];
        E2_restored[i] = NULL;

        delete[] G2[i];
        G2[i] = NULL;

        delete[] E2_complex[i];
        E2_complex[i] = NULL;

        delete[] E2_real[i];
        E2_real[i] = NULL;
    }

    delete[] E2_restored;
    delete[] G2;
    delete[] E2_complex;
    delete[] E2_real;

    // free 1D arrays
    delete[] E1_restored;
    delete[] G1;
    delete[] E1_complex;
    delete[] E1_real;

    std::cout << "Tests of FFT for native C++ pointer-based arrays on heap "
              << "completed successfully!" << std::endl;
    return SUCCESS;
}

} // namespace fft_test
} // namespace simple_fft


================================================
FILE: unit-tests/test_with_std_vectors.cpp
================================================
#include "../include/simple_fft/fft_settings.h"

// STL vectors use square brackets for element access operator
#ifndef __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
#define __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
#endif

#include "../include/simple_fft/fft.h"
#include "test_fft.h"
#include <iostream>

namespace simple_fft {
namespace fft_test {

int testStdVectorsFFT()
{
    std::cout << "Testing FFT algorithms with std::vectors and vectors of vectors "
              << "as used array types" << std::endl;

    using namespace pulse_params;

    std::vector<real_type> t, x, y;
    makeGridsForPulse3D(t, x, y);

    // typedefing vectors
    typedef std::vector<real_type> RealArray1D;
    typedef std::vector<complex_type> ComplexArray1D;
    typedef std::vector<std::vector<real_type> > RealArray2D;
    typedef std::vector<std::vector<complex_type> > ComplexArray2D;
    typedef std::vector<std::vector<std::vector<real_type> > > RealArray3D;
    typedef std::vector<std::vector<std::vector<complex_type> > > ComplexArray3D;

    // 1D fields and spectrum
    RealArray1D E1_real(nt);
    ComplexArray1D E1_complex(nt), G1(nt), E1_restored(nt);

    // 2D fields and spectrum
    RealArray2D E2_real(nt);
    ComplexArray2D E2_complex(nt), G2(nt), E2_restored(nt);

    int grid_size_t = static_cast<int>(nt);

#ifndef __clang__
#ifdef __USE_OPENMP
#pragma omp parallel for
#endif
#endif
    for(int i = 0; i < grid_size_t; ++i) {
        E2_real[i].resize(nx);
        E2_complex[i].resize(nx);
        G2[i].resize(nx);
        E2_restored[i].resize(nx);
    }

    // 3D fields and spectrum
    RealArray3D E3_real(nt);
    ComplexArray3D E3_complex(nt), G3(nt), E3_restored(nt);

    int grid_size_x = static_cast<int>(nx);

#ifndef __clang__
#ifdef __USE_OPENMP
#pragma omp parallel for
#endif
#endif
    for(int i = 0; i < grid_size_t; ++i) {
        E3_real[i].resize(nx);
        E3_complex[i].resize(nx);
        G3[i].resize(nx);
        E3_restored[i].resize(nx);
        for(int j = 0; j < grid_size_x; ++j) {
            E3_real[i][j].resize(ny);
            E3_complex[i][j].resize(ny);
            G3[i][j].resize(ny);
            E3_restored[i][j].resize(ny);
        }
    }

    if (!commonPartsForTests3D(E1_real, E2_real, E3_real, E1_complex, E2_complex,
                               E3_complex, G1, G2, G3, E1_restored, E2_restored,
                               E3_restored, t, x, y))
    {
        std::cout << "Tests of FFT with std vectors returned with errors!"
                  << std::endl;
        return FAILURE;
    }

    std::cout << "Tests of FFT with std vectors completed successfully!" << std::endl;
    return SUCCESS;
}

} // namespace fft_test
} // namespace simple_fft


================================================
FILE: unit-tests/test_with_stlsoft.cpp
================================================
#include "../include/simple_fft/fft_settings.h"

#ifdef __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
#undef __USE_SQUARE_BRACKETS_FOR_ELEMENT_ACCESS_OPERATOR
#endif

#include "../include/simple_fft/fft.h"
#include "test_fft.h"
#include <iostream>
#include <stlsoft/containers/fixed_array.hpp>

namespace simple_fft {
namespace fft_test {

int testStlSoft()
{
    std::cout << "Testing FFT algorithms with STLSoft fixed arrays" << std::endl;

    using namespace pulse_params;

    std::vector<real_type> t, x, y;
    makeGridsForPulse3D(t, x, y);

    // typedefing arrays
    typedef stlsoft::fixed_array_1d<real_type> RealArray1D;
    typedef stlsoft::fixed_array_1d<complex_type> ComplexArray1D;
    typedef stlsoft::fixed_array_2d<real_type> RealArray2D;
    typedef stlsoft::fixed_array_2d<complex_type> ComplexArray2D;
    typedef stlsoft::fixed_array_3d<real_type> RealArray3D;
    typedef stlsoft::fixed_array_3d<complex_type> ComplexArray3D;

    // 1D fields and spectrum
    RealArray1D E1_real(nt);
    ComplexArray1D E1_complex(nt), G1(nt), E1_restored(nt);

    // 2D fields and spectrum
    RealArray2D E2_real(nt,nx);
    ComplexArray2D E2_complex(nt,nx), G2(nt,nx), E2_restored(nt,nx);

    // 3D fields and spectrum
    RealArray3D E3_real(nt,nx,ny);
    ComplexArray3D E3_complex(nt,nx,ny), G3(nt,nx,ny), E3_restored(nt,nx,ny);

    if (!commonPartsForTests3D(E1_real, E2_real, E3_real, E1_complex, E2_complex,
                               E3_complex, G1, G2, G3, E1_restored, E2_restored,
                               E3_restored, t, x, y))
    {
        std::cout << "Tests of FFT with STLSoft fixed_arrays returned with errors!" << std::endl;
        return FAILURE;
    }

    std::cout << "Tests of FFT with STLSoft fixed_arrays completed successfully!" << std::endl;
    return SUCCESS;
}

} // namespace fft_test
} // namespace simple_fft


================================================
FILE: unit-tests/unit_tests_main.cpp
================================================
#include "test_fft.h"
#include <iostream>

#ifdef __USE_OPENMP
#include <omp.h>
#endif

using namespace simple_fft::fft_test;

int main()
{
#ifdef __USE_OPENMP
    omp_set_dynamic(0);
    omp_set_num_threads(1);
#endif

    int res;

    res = testStdVectorsFFT();
    if (res != 0) {
        return res;
    }

    res = testNativeArraysFFT();
    if (res != 0) {
        return res;
    }

#ifdef HAS_BOOST_PACKAGE

#ifdef HAS_BOOST_MULTI_ARRAY
    res = testBoostMultiArray();
    if (res != 0) {
        return res;
    }
#endif // HAS_BOOST_MULTI_ARRAY

#ifdef HAS_BOOST_UBLAS
    res = testBoostUblas();
    if (res != 0) {
        return res;
    }
#endif // HAS_BOOST_UBLAS

#endif // HAS_BOOST_PACKAGE

#ifdef HAS_EIGEN
    res = testEigen();
    if (res != 0) {
        return res;
    }
#endif // HAS_EIGEN

#ifdef HAS_MARRAY
    res = testMarray();
    if (res != 0) {
        return res;
    }
#endif // HAS_MARRAY

#ifdef HAS_ARMADILLO
    res = testArmadillo();
    if (res != 0) {
        return res;
    }
#endif // HAS_ARMADILLO

#ifdef HAS_BLITZ
    res = testBlitz();
    if (res != 0) {
        return res;
    }
#endif // HAS_BLITZ

#ifdef HAS_STLSOFT
    res = testStlSoft();
    if (res != 0) {
        return res;
    }
#endif // HAS_STLSOFT

    return 0;
}