Repository: HiFi-LoFi/AudioFFT Branch: master Commit: 0893b532dd35 Files: 5 Total size: 53.0 KB Directory structure: gitextract_iwcc4ep1/ ├── AudioFFT.cpp ├── AudioFFT.h ├── COPYING.txt ├── README.md └── test/ └── Test.cpp ================================================ FILE CONTENTS ================================================ ================================================ FILE: AudioFFT.cpp ================================================ // ================================================================================== // Copyright (c) 2017 HiFi-LoFi // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is furnished // to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS // FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR // COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER // IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. // ================================================================================== #include "AudioFFT.h" #include #include #include #if defined(AUDIOFFT_INTEL_IPP) #define AUDIOFFT_INTEL_IPP_USED #include #elif defined(AUDIOFFT_APPLE_ACCELERATE) #define AUDIOFFT_APPLE_ACCELERATE_USED #include #include #elif defined (AUDIOFFT_FFTW3) #define AUDIOFFT_FFTW3_USED #include #else #if !defined(AUDIOFFT_OOURA) #define AUDIOFFT_OOURA #endif #define AUDIOFFT_OOURA_USED #include #endif namespace audiofft { namespace detail { class AudioFFTImpl { public: AudioFFTImpl() = default; AudioFFTImpl(const AudioFFTImpl&) = delete; AudioFFTImpl& operator=(const AudioFFTImpl&) = delete; virtual ~AudioFFTImpl() = default; virtual void init(size_t size) = 0; virtual void fft(const float* data, float* re, float* im) = 0; virtual void ifft(float* data, const float* re, const float* im) = 0; }; constexpr bool IsPowerOf2(size_t val) { return (val == 1 || (val & (val-1)) == 0); } template void ConvertBuffer(TypeDest* dest, const TypeSrc* src, size_t len) { for (size_t i=0; i(src[i]); } } template void ScaleBuffer(TypeDest* dest, const TypeSrc* src, const TypeFactor factor, size_t len) { for (size_t i=0; i(static_cast(src[i]) * factor); } } } // End of namespace detail // ================================================================ #ifdef AUDIOFFT_OOURA_USED /** * @internal * @class OouraFFT * @brief FFT implementation based on the great radix-4 routines by Takuya Ooura */ class OouraFFT : public detail::AudioFFTImpl { public: OouraFFT() : detail::AudioFFTImpl(), _size(0), _ip(), _w(), _buffer() { } OouraFFT(const OouraFFT&) = delete; OouraFFT& operator=(const OouraFFT&) = delete; virtual void init(size_t size) override { if (_size != size) { _ip.resize(2 + static_cast(std::sqrt(static_cast(size)))); _w.resize(size / 2); _buffer.resize(size); _size = size; const int size4 = static_cast(_size) / 4; makewt(size4, _ip.data(), _w.data()); makect(size4, _ip.data(), _w.data() + size4); } } virtual void fft(const float* data, float* re, float* im) override { // Convert into the format as required by the Ooura FFT detail::ConvertBuffer(_buffer.data(), data, _size); rdft(static_cast(_size), +1, _buffer.data(), _ip.data(), _w.data()); // Convert back to split-complex { double* b = _buffer.data(); double* bEnd = b + _size; float *r = re; float *i = im; while (b != bEnd) { *(r++) = static_cast(*(b++)); *(i++) = static_cast(-(*(b++))); } } const size_t size2 = _size / 2; re[size2] = -im[0]; im[0] = 0.0; im[size2] = 0.0; } virtual void ifft(float* data, const float* re, const float* im) override { // Convert into the format as required by the Ooura FFT { double* b = _buffer.data(); double* bEnd = b + _size; const float *r = re; const float *i = im; while (b != bEnd) { *(b++) = static_cast(*(r++)); *(b++) = -static_cast(*(i++)); } _buffer[1] = re[_size / 2]; } rdft(static_cast(_size), -1, _buffer.data(), _ip.data(), _w.data()); // Convert back to split-complex detail::ScaleBuffer(data, _buffer.data(), 2.0 / static_cast(_size), _size); } private: size_t _size; std::vector _ip; std::vector _w; std::vector _buffer; void rdft(int n, int isgn, double *a, int *ip, double *w) { int nw = ip[0]; int nc = ip[1]; if (isgn >= 0) { if (n > 4) { bitrv2(n, ip + 2, a); cftfsub(n, a, w); rftfsub(n, a, nc, w + nw); } else if (n == 4) { cftfsub(n, a, w); } double xi = a[0] - a[1]; a[0] += a[1]; a[1] = xi; } else { a[1] = 0.5 * (a[0] - a[1]); a[0] -= a[1]; if (n > 4) { rftbsub(n, a, nc, w + nw); bitrv2(n, ip + 2, a); cftbsub(n, a, w); } else if (n == 4) { cftfsub(n, a, w); } } } /* -------- initializing routines -------- */ void makewt(int nw, int *ip, double *w) { int j, nwh; double delta, x, y; ip[0] = nw; ip[1] = 1; if (nw > 2) { nwh = nw >> 1; delta = atan(1.0) / nwh; w[0] = 1; w[1] = 0; w[nwh] = cos(delta * nwh); w[nwh + 1] = w[nwh]; if (nwh > 2) { for (j = 2; j < nwh; j += 2) { x = cos(delta * j); y = sin(delta * j); w[j] = x; w[j + 1] = y; w[nw - j] = y; w[nw - j + 1] = x; } bitrv2(nw, ip + 2, w); } } } void makect(int nc, int *ip, double *c) { int j, nch; double delta; ip[1] = nc; if (nc > 1) { nch = nc >> 1; delta = atan(1.0) / nch; c[0] = cos(delta * nch); c[nch] = 0.5 * c[0]; for (j = 1; j < nch; j++) { c[j] = 0.5 * cos(delta * j); c[nc - j] = 0.5 * sin(delta * j); } } } /* -------- child routines -------- */ void bitrv2(int n, int *ip, double *a) { int j, j1, k, k1, l, m, m2; double xr, xi, yr, yi; ip[0] = 0; l = n; m = 1; while ((m << 3) < l) { l >>= 1; for (j = 0; j < m; j++) { ip[m + j] = ip[j] + l; } m <<= 1; } m2 = 2 * m; if ((m << 3) == l) { for (k = 0; k < m; k++) { for (j = 0; j < k; j++) { j1 = 2 * j + ip[k]; k1 = 2 * k + ip[j]; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += m2; k1 += 2 * m2; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += m2; k1 -= m2; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += m2; k1 += 2 * m2; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; } j1 = 2 * k + m2 + ip[k]; k1 = j1 + m2; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; } } else { for (k = 1; k < m; k++) { for (j = 0; j < k; j++) { j1 = 2 * j + ip[k]; k1 = 2 * k + ip[j]; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += m2; k1 += m2; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; } } } } void cftfsub(int n, double *a, double *w) { int j, j1, j2, j3, l; double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; l = 2; if (n > 8) { cft1st(n, a, w); l = 8; while ((l << 2) < n) { cftmdl(n, l, a, w); l <<= 2; } } if ((l << 2) == n) { for (j = 0; j < l; j += 2) { j1 = j + l; j2 = j1 + l; j3 = j2 + l; x0r = a[j] + a[j1]; x0i = a[j + 1] + a[j1 + 1]; x1r = a[j] - a[j1]; x1i = a[j + 1] - a[j1 + 1]; x2r = a[j2] + a[j3]; x2i = a[j2 + 1] + a[j3 + 1]; x3r = a[j2] - a[j3]; x3i = a[j2 + 1] - a[j3 + 1]; a[j] = x0r + x2r; a[j + 1] = x0i + x2i; a[j2] = x0r - x2r; a[j2 + 1] = x0i - x2i; a[j1] = x1r - x3i; a[j1 + 1] = x1i + x3r; a[j3] = x1r + x3i; a[j3 + 1] = x1i - x3r; } } else { for (j = 0; j < l; j += 2) { j1 = j + l; x0r = a[j] - a[j1]; x0i = a[j + 1] - a[j1 + 1]; a[j] += a[j1]; a[j + 1] += a[j1 + 1]; a[j1] = x0r; a[j1 + 1] = x0i; } } } void cftbsub(int n, double *a, double *w) { int j, j1, j2, j3, l; double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; l = 2; if (n > 8) { cft1st(n, a, w); l = 8; while ((l << 2) < n) { cftmdl(n, l, a, w); l <<= 2; } } if ((l << 2) == n) { for (j = 0; j < l; j += 2) { j1 = j + l; j2 = j1 + l; j3 = j2 + l; x0r = a[j] + a[j1]; x0i = -a[j + 1] - a[j1 + 1]; x1r = a[j] - a[j1]; x1i = -a[j + 1] + a[j1 + 1]; x2r = a[j2] + a[j3]; x2i = a[j2 + 1] + a[j3 + 1]; x3r = a[j2] - a[j3]; x3i = a[j2 + 1] - a[j3 + 1]; a[j] = x0r + x2r; a[j + 1] = x0i - x2i; a[j2] = x0r - x2r; a[j2 + 1] = x0i + x2i; a[j1] = x1r - x3i; a[j1 + 1] = x1i - x3r; a[j3] = x1r + x3i; a[j3 + 1] = x1i + x3r; } } else { for (j = 0; j < l; j += 2) { j1 = j + l; x0r = a[j] - a[j1]; x0i = -a[j + 1] + a[j1 + 1]; a[j] += a[j1]; a[j + 1] = -a[j + 1] - a[j1 + 1]; a[j1] = x0r; a[j1 + 1] = x0i; } } } void cft1st(int n, double *a, double *w) { int j, k1, k2; double wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; x0r = a[0] + a[2]; x0i = a[1] + a[3]; x1r = a[0] - a[2]; x1i = a[1] - a[3]; x2r = a[4] + a[6]; x2i = a[5] + a[7]; x3r = a[4] - a[6]; x3i = a[5] - a[7]; a[0] = x0r + x2r; a[1] = x0i + x2i; a[4] = x0r - x2r; a[5] = x0i - x2i; a[2] = x1r - x3i; a[3] = x1i + x3r; a[6] = x1r + x3i; a[7] = x1i - x3r; wk1r = w[2]; x0r = a[8] + a[10]; x0i = a[9] + a[11]; x1r = a[8] - a[10]; x1i = a[9] - a[11]; x2r = a[12] + a[14]; x2i = a[13] + a[15]; x3r = a[12] - a[14]; x3i = a[13] - a[15]; a[8] = x0r + x2r; a[9] = x0i + x2i; a[12] = x2i - x0i; a[13] = x0r - x2r; x0r = x1r - x3i; x0i = x1i + x3r; a[10] = wk1r * (x0r - x0i); a[11] = wk1r * (x0r + x0i); x0r = x3i + x1r; x0i = x3r - x1i; a[14] = wk1r * (x0i - x0r); a[15] = wk1r * (x0i + x0r); k1 = 0; for (j = 16; j < n; j += 16) { k1 += 2; k2 = 2 * k1; wk2r = w[k1]; wk2i = w[k1 + 1]; wk1r = w[k2]; wk1i = w[k2 + 1]; wk3r = wk1r - 2 * wk2i * wk1i; wk3i = 2 * wk2i * wk1r - wk1i; x0r = a[j] + a[j + 2]; x0i = a[j + 1] + a[j + 3]; x1r = a[j] - a[j + 2]; x1i = a[j + 1] - a[j + 3]; x2r = a[j + 4] + a[j + 6]; x2i = a[j + 5] + a[j + 7]; x3r = a[j + 4] - a[j + 6]; x3i = a[j + 5] - a[j + 7]; a[j] = x0r + x2r; a[j + 1] = x0i + x2i; x0r -= x2r; x0i -= x2i; a[j + 4] = wk2r * x0r - wk2i * x0i; a[j + 5] = wk2r * x0i + wk2i * x0r; x0r = x1r - x3i; x0i = x1i + x3r; a[j + 2] = wk1r * x0r - wk1i * x0i; a[j + 3] = wk1r * x0i + wk1i * x0r; x0r = x1r + x3i; x0i = x1i - x3r; a[j + 6] = wk3r * x0r - wk3i * x0i; a[j + 7] = wk3r * x0i + wk3i * x0r; wk1r = w[k2 + 2]; wk1i = w[k2 + 3]; wk3r = wk1r - 2 * wk2r * wk1i; wk3i = 2 * wk2r * wk1r - wk1i; x0r = a[j + 8] + a[j + 10]; x0i = a[j + 9] + a[j + 11]; x1r = a[j + 8] - a[j + 10]; x1i = a[j + 9] - a[j + 11]; x2r = a[j + 12] + a[j + 14]; x2i = a[j + 13] + a[j + 15]; x3r = a[j + 12] - a[j + 14]; x3i = a[j + 13] - a[j + 15]; a[j + 8] = x0r + x2r; a[j + 9] = x0i + x2i; x0r -= x2r; x0i -= x2i; a[j + 12] = -wk2i * x0r - wk2r * x0i; a[j + 13] = -wk2i * x0i + wk2r * x0r; x0r = x1r - x3i; x0i = x1i + x3r; a[j + 10] = wk1r * x0r - wk1i * x0i; a[j + 11] = wk1r * x0i + wk1i * x0r; x0r = x1r + x3i; x0i = x1i - x3r; a[j + 14] = wk3r * x0r - wk3i * x0i; a[j + 15] = wk3r * x0i + wk3i * x0r; } } void cftmdl(int n, int l, double *a, double *w) { int j, j1, j2, j3, k, k1, k2, m, m2; double wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; m = l << 2; for (j = 0; j < l; j += 2) { j1 = j + l; j2 = j1 + l; j3 = j2 + l; x0r = a[j] + a[j1]; x0i = a[j + 1] + a[j1 + 1]; x1r = a[j] - a[j1]; x1i = a[j + 1] - a[j1 + 1]; x2r = a[j2] + a[j3]; x2i = a[j2 + 1] + a[j3 + 1]; x3r = a[j2] - a[j3]; x3i = a[j2 + 1] - a[j3 + 1]; a[j] = x0r + x2r; a[j + 1] = x0i + x2i; a[j2] = x0r - x2r; a[j2 + 1] = x0i - x2i; a[j1] = x1r - x3i; a[j1 + 1] = x1i + x3r; a[j3] = x1r + x3i; a[j3 + 1] = x1i - x3r; } wk1r = w[2]; for (j = m; j < l + m; j += 2) { j1 = j + l; j2 = j1 + l; j3 = j2 + l; x0r = a[j] + a[j1]; x0i = a[j + 1] + a[j1 + 1]; x1r = a[j] - a[j1]; x1i = a[j + 1] - a[j1 + 1]; x2r = a[j2] + a[j3]; x2i = a[j2 + 1] + a[j3 + 1]; x3r = a[j2] - a[j3]; x3i = a[j2 + 1] - a[j3 + 1]; a[j] = x0r + x2r; a[j + 1] = x0i + x2i; a[j2] = x2i - x0i; a[j2 + 1] = x0r - x2r; x0r = x1r - x3i; x0i = x1i + x3r; a[j1] = wk1r * (x0r - x0i); a[j1 + 1] = wk1r * (x0r + x0i); x0r = x3i + x1r; x0i = x3r - x1i; a[j3] = wk1r * (x0i - x0r); a[j3 + 1] = wk1r * (x0i + x0r); } k1 = 0; m2 = 2 * m; for (k = m2; k < n; k += m2) { k1 += 2; k2 = 2 * k1; wk2r = w[k1]; wk2i = w[k1 + 1]; wk1r = w[k2]; wk1i = w[k2 + 1]; wk3r = wk1r - 2 * wk2i * wk1i; wk3i = 2 * wk2i * wk1r - wk1i; for (j = k; j < l + k; j += 2) { j1 = j + l; j2 = j1 + l; j3 = j2 + l; x0r = a[j] + a[j1]; x0i = a[j + 1] + a[j1 + 1]; x1r = a[j] - a[j1]; x1i = a[j + 1] - a[j1 + 1]; x2r = a[j2] + a[j3]; x2i = a[j2 + 1] + a[j3 + 1]; x3r = a[j2] - a[j3]; x3i = a[j2 + 1] - a[j3 + 1]; a[j] = x0r + x2r; a[j + 1] = x0i + x2i; x0r -= x2r; x0i -= x2i; a[j2] = wk2r * x0r - wk2i * x0i; a[j2 + 1] = wk2r * x0i + wk2i * x0r; x0r = x1r - x3i; x0i = x1i + x3r; a[j1] = wk1r * x0r - wk1i * x0i; a[j1 + 1] = wk1r * x0i + wk1i * x0r; x0r = x1r + x3i; x0i = x1i - x3r; a[j3] = wk3r * x0r - wk3i * x0i; a[j3 + 1] = wk3r * x0i + wk3i * x0r; } wk1r = w[k2 + 2]; wk1i = w[k2 + 3]; wk3r = wk1r - 2 * wk2r * wk1i; wk3i = 2 * wk2r * wk1r - wk1i; for (j = k + m; j < l + (k + m); j += 2) { j1 = j + l; j2 = j1 + l; j3 = j2 + l; x0r = a[j] + a[j1]; x0i = a[j + 1] + a[j1 + 1]; x1r = a[j] - a[j1]; x1i = a[j + 1] - a[j1 + 1]; x2r = a[j2] + a[j3]; x2i = a[j2 + 1] + a[j3 + 1]; x3r = a[j2] - a[j3]; x3i = a[j2 + 1] - a[j3 + 1]; a[j] = x0r + x2r; a[j + 1] = x0i + x2i; x0r -= x2r; x0i -= x2i; a[j2] = -wk2i * x0r - wk2r * x0i; a[j2 + 1] = -wk2i * x0i + wk2r * x0r; x0r = x1r - x3i; x0i = x1i + x3r; a[j1] = wk1r * x0r - wk1i * x0i; a[j1 + 1] = wk1r * x0i + wk1i * x0r; x0r = x1r + x3i; x0i = x1i - x3r; a[j3] = wk3r * x0r - wk3i * x0i; a[j3 + 1] = wk3r * x0i + wk3i * x0r; } } } void rftfsub(int n, double *a, int nc, double *c) { int j, k, kk, ks, m; double wkr, wki, xr, xi, yr, yi; m = n >> 1; ks = 2 * nc / m; kk = 0; for (j = 2; j < m; j += 2) { k = n - j; kk += ks; wkr = 0.5 - c[nc - kk]; wki = c[kk]; xr = a[j] - a[k]; xi = a[j + 1] + a[k + 1]; yr = wkr * xr - wki * xi; yi = wkr * xi + wki * xr; a[j] -= yr; a[j + 1] -= yi; a[k] += yr; a[k + 1] -= yi; } } void rftbsub(int n, double *a, int nc, double *c) { int j, k, kk, ks, m; double wkr, wki, xr, xi, yr, yi; a[1] = -a[1]; m = n >> 1; ks = 2 * nc / m; kk = 0; for (j = 2; j < m; j += 2) { k = n - j; kk += ks; wkr = 0.5 - c[nc - kk]; wki = c[kk]; xr = a[j] - a[k]; xi = a[j + 1] + a[k + 1]; yr = wkr * xr + wki * xi; yi = wkr * xi - wki * xr; a[j] -= yr; a[j + 1] = yi - a[j + 1]; a[k] += yr; a[k + 1] = yi - a[k + 1]; } a[m + 1] = -a[m + 1]; } }; /** * @internal * @brief Concrete FFT implementation */ typedef OouraFFT AudioFFTImplementation; #endif // AUDIOFFT_OOURA_USED // ================================================================ #ifdef AUDIOFFT_INTEL_IPP_USED /** * @internal * @class IntelIppFFT * @brief FFT implementation using the Intel Integrated Performance Primitives */ class IntelIppFFT : public detail::AudioFFTImpl { public: IntelIppFFT() : detail::AudioFFTImpl(), _size(0), _operationalBufferSize(0), _powerOf2(0), _fftSpec(nullptr), _fftSpecBuf(0), _fftWorkBuf(0), _operationalBuffer(nullptr) { ippInit(); } IntelIppFFT(const IntelIppFFT&) = delete; IntelIppFFT& operator=(const IntelIppFFT&) = delete; virtual ~IntelIppFFT() { init(0); } virtual void init(size_t size) override { if (_fftSpec) { if (_fftWorkBuf) ippFree(_fftWorkBuf); if (_fftSpecBuf) ippFree(_fftSpecBuf); ippFree(_operationalBuffer); _size = 0; _operationalBufferSize = 0; _powerOf2 = 0; _fftSpec = 0; } if (size > 0) { _size = size; _operationalBufferSize = _size + 2; _powerOf2 = (int)(log((double)_size)/log(2.0)); // Query to get buffer sizes int sizeFFTSpec, sizeFFTInitBuf, sizeFFTWorkBuf; ippsFFTGetSize_R_32f( _powerOf2, IPP_FFT_NODIV_BY_ANY, ippAlgHintAccurate, &sizeFFTSpec, &sizeFFTInitBuf, &sizeFFTWorkBuf ); Ipp8u* fftInitBuf; // init buffers _fftSpecBuf = ippsMalloc_8u(sizeFFTSpec); _fftWorkBuf = ippsMalloc_8u(sizeFFTWorkBuf); fftInitBuf = ippsMalloc_8u(sizeFFTInitBuf); // Initialize FFT ippsFFTInit_R_32f( &_fftSpec, _powerOf2, IPP_FFT_NODIV_BY_ANY, ippAlgHintAccurate, _fftSpecBuf, fftInitBuf ); if (fftInitBuf) ippFree(fftInitBuf); // init operational buffer _operationalBuffer = ippsMalloc_32f( _operationalBufferSize ); } } virtual void fft(const float* data, float* re, float* im) override { size_t complexNumbersCount = _operationalBufferSize / 2; ippsFFTFwd_RToCCS_32f( data, _operationalBuffer, _fftSpec, _fftWorkBuf ); // no need to scale size_t complexCounter = 0; for (int i = 0; i < complexNumbersCount; ++i) { re[i] = _operationalBuffer[complexCounter++]; im[i] = _operationalBuffer[complexCounter++]; } } virtual void ifft(float* data, const float* re, const float* im) override { size_t complexNumbersCount = _operationalBufferSize / 2; size_t complexCounter = 0; for (int i = 0; i < complexNumbersCount; ++i) { _operationalBuffer[complexCounter++] = re[i]; _operationalBuffer[complexCounter++] = im[i]; } ippsFFTInv_CCSToR_32f( _operationalBuffer, data, _fftSpec, _fftWorkBuf ); // scaling const float factor = 1.0f / static_cast(_size); ippsMulC_32f_I(factor, data, _size); } private: size_t _size; size_t _operationalBufferSize; size_t _powerOf2; IppsFFTSpec_R_32f* _fftSpec; Ipp8u* _fftSpecBuf; Ipp8u* _fftWorkBuf; Ipp32f* _operationalBuffer; }; /** * @internal * @brief Concrete FFT implementation */ typedef IntelIppFFT AudioFFTImplementation; #endif // AUDIOFFT_INTEL_IPP_USED // ================================================================ #ifdef AUDIOFFT_APPLE_ACCELERATE_USED /** * @internal * @class AppleAccelerateFFT * @brief FFT implementation using the Apple Accelerate framework internally */ class AppleAccelerateFFT : public detail::AudioFFTImpl { public: AppleAccelerateFFT() : detail::AudioFFTImpl(), _size(0), _powerOf2(0), _fftSetup(0), _re(), _im() { } AppleAccelerateFFT(const AppleAccelerateFFT&) = delete; AppleAccelerateFFT& operator=(const AppleAccelerateFFT&) = delete; virtual ~AppleAccelerateFFT() { init(0); } virtual void init(size_t size) override { if (_fftSetup) { vDSP_destroy_fftsetup(_fftSetup); _size = 0; _powerOf2 = 0; _fftSetup = 0; _re.clear(); _im.clear(); } if (size > 0) { _size = size; _powerOf2 = 0; while ((1 << _powerOf2) < _size) { ++_powerOf2; } _fftSetup = vDSP_create_fftsetup(_powerOf2, FFT_RADIX2); _re.resize(_size / 2); _im.resize(_size / 2); } } virtual void fft(const float* data, float* re, float* im) override { const size_t size2 = _size / 2; DSPSplitComplex splitComplex; splitComplex.realp = re; splitComplex.imagp = im; vDSP_ctoz(reinterpret_cast(data), 2, &splitComplex, 1, size2); vDSP_fft_zrip(_fftSetup, &splitComplex, 1, _powerOf2, FFT_FORWARD); const float factor = 0.5f; vDSP_vsmul(re, 1, &factor, re, 1, size2); vDSP_vsmul(im, 1, &factor, im, 1, size2); re[size2] = im[0]; im[0] = 0.0f; im[size2] = 0.0f; } virtual void ifft(float* data, const float* re, const float* im) override { const size_t size2 = _size / 2; ::memcpy(_re.data(), re, size2 * sizeof(float)); ::memcpy(_im.data(), im, size2 * sizeof(float)); _im[0] = re[size2]; DSPSplitComplex splitComplex; splitComplex.realp = _re.data(); splitComplex.imagp = _im.data(); vDSP_fft_zrip(_fftSetup, &splitComplex, 1, _powerOf2, FFT_INVERSE); vDSP_ztoc(&splitComplex, 1, reinterpret_cast(data), 2, size2); const float factor = 1.0f / static_cast(_size); vDSP_vsmul(data, 1, &factor, data, 1, _size); } private: size_t _size; size_t _powerOf2; FFTSetup _fftSetup; std::vector _re; std::vector _im; }; /** * @internal * @brief Concrete FFT implementation */ typedef AppleAccelerateFFT AudioFFTImplementation; #endif // AUDIOFFT_APPLE_ACCELERATE_USED // ================================================================ #ifdef AUDIOFFT_FFTW3_USED /** * @internal * @class FFTW3FFT * @brief FFT implementation using FFTW3 internally (see fftw.org) */ class FFTW3FFT : public detail::AudioFFTImpl { public: FFTW3FFT() : detail::AudioFFTImpl(), _size(0), _complexSize(0), _planForward(0), _planBackward(0), _data(0), _re(0), _im(0) { } FFTW3FFT(const FFTW3FFT&) = delete; FFTW3FFT& operator=(const FFTW3FFT&) = delete; virtual ~FFTW3FFT() { init(0); } virtual void init(size_t size) override { if (_size != size) { if (_size > 0) { fftwf_destroy_plan(_planForward); fftwf_destroy_plan(_planBackward); _planForward = 0; _planBackward = 0; _size = 0; _complexSize = 0; if (_data) { fftwf_free(_data); _data = 0; } if (_re) { fftwf_free(_re); _re = 0; } if (_im) { fftwf_free(_im); _im = 0; } } if (size > 0) { _size = size; _complexSize = AudioFFT::ComplexSize(_size); const size_t complexSize = AudioFFT::ComplexSize(_size); _data = reinterpret_cast(fftwf_malloc(_size * sizeof(float))); _re = reinterpret_cast(fftwf_malloc(complexSize * sizeof(float))); _im = reinterpret_cast(fftwf_malloc(complexSize * sizeof(float))); fftw_iodim dim; dim.n = static_cast(size); dim.is = 1; dim.os = 1; _planForward = fftwf_plan_guru_split_dft_r2c(1, &dim, 0, 0, _data, _re, _im, FFTW_MEASURE); _planBackward = fftwf_plan_guru_split_dft_c2r(1, &dim, 0, 0, _re, _im, _data, FFTW_MEASURE); } } } virtual void fft(const float* data, float* re, float* im) override { ::memcpy(_data, data, _size * sizeof(float)); fftwf_execute_split_dft_r2c(_planForward, _data, _re, _im); ::memcpy(re, _re, _complexSize * sizeof(float)); ::memcpy(im, _im, _complexSize * sizeof(float)); } virtual void ifft(float* data, const float* re, const float* im) override { ::memcpy(_re, re, _complexSize * sizeof(float)); ::memcpy(_im, im, _complexSize * sizeof(float)); fftwf_execute_split_dft_c2r(_planBackward, _re, _im, _data); detail::ScaleBuffer(data, _data, 1.0f / static_cast(_size), _size); } private: size_t _size; size_t _complexSize; fftwf_plan _planForward; fftwf_plan _planBackward; float* _data; float* _re; float* _im; }; /** * @internal * @brief Concrete FFT implementation */ typedef FFTW3FFT AudioFFTImplementation; #endif // AUDIOFFT_FFTW3_USED // ============================================================= AudioFFT::AudioFFT() : _impl(new AudioFFTImplementation()) { } AudioFFT::~AudioFFT() { } void AudioFFT::init(size_t size) { assert(detail::IsPowerOf2(size)); _impl->init(size); } void AudioFFT::fft(const float* data, float* re, float* im) { _impl->fft(data, re, im); } void AudioFFT::ifft(float* data, const float* re, const float* im) { _impl->ifft(data, re, im); } size_t AudioFFT::ComplexSize(size_t size) { return (size / 2) + 1; } } // End of namespace ================================================ FILE: AudioFFT.h ================================================ // ================================================================================== // Copyright (c) 2017 HiFi-LoFi // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is furnished // to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS // FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR // COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER // IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. // ================================================================================== #ifndef _AUDIOFFT_H #define _AUDIOFFT_H /** * AudioFFT provides real-to-complex/complex-to-real FFT routines. * * Features: * * - Real-complex FFT and complex-real inverse FFT for power-of-2-sized real data. * * - Uniform interface to different FFT implementations (currently Ooura, FFTW3 and Apple Accelerate). * * - Complex data is handled in "split-complex" format, i.e. there are separate * arrays for the real and imaginary parts which can be useful for SIMD optimizations * (split-complex arrays have to be of length (size/2+1) representing bins from DC * to Nyquist frequency). * * - Output is "ready to use" (all scaling etc. is already handled internally). * * - No allocations/deallocations after the initialization which makes it usable * for real-time audio applications (that's what I wrote it for and using it). * * * How to use it in your project: * * - Add the .h and .cpp file to your project - that's all. * * - To get extra speed, you can link FFTW3 to your project and define * AUDIOFFT_FFTW3 (however, please check whether your project suits the * according license). * * - To get the best speed on Apple platforms, you can link the Apple * Accelerate framework to your project and define * AUDIOFFT_APPLE_ACCELERATE (however, please check whether your * project suits the according license). * * * Remarks: * * - AudioFFT is not intended to be the fastest FFT, but to be a fast-enough * FFT suitable for most audio applications. * * - AudioFFT uses the quite liberal MIT license. * * * Example usage: * @code * #include "AudioFFT.h" * * void Example() * { * const size_t fftSize = 1024; // Needs to be power of 2! * * std::vector input(fftSize, 0.0f); * std::vector re(audiofft::AudioFFT::ComplexSize(fftSize)); * std::vector im(audiofft::AudioFFT::ComplexSize(fftSize)); * std::vector output(fftSize); * * audiofft::AudioFFT fft; * fft.init(1024); * fft.fft(input.data(), re.data(), im.data()); * fft.ifft(output.data(), re.data(), im.data()); * } * @endcode */ #include #include namespace audiofft { namespace detail { class AudioFFTImpl; } // ============================================================= /** * @class AudioFFT * @brief Performs 1D FFTs */ class AudioFFT { public: /** * @brief Constructor */ AudioFFT(); AudioFFT(const AudioFFT&) = delete; AudioFFT& operator=(const AudioFFT&) = delete; /** * @brief Destructor */ ~AudioFFT(); /** * @brief Initializes the FFT object * @param size Size of the real input (must be power 2) */ void init(size_t size); /** * @brief Performs the forward FFT * @param data The real input data (has to be of the length as specified in init()) * @param re The real part of the complex output (has to be of length as returned by ComplexSize()) * @param im The imaginary part of the complex output (has to be of length as returned by ComplexSize()) */ void fft(const float* data, float* re, float* im); /** * @brief Performs the inverse FFT * @param data The real output data (has to be of the length as specified in init()) * @param re The real part of the complex input (has to be of length as returned by ComplexSize()) * @param im The imaginary part of the complex input (has to be of length as returned by ComplexSize()) */ void ifft(float* data, const float* re, const float* im); /** * @brief Calculates the necessary size of the real/imaginary complex arrays * @param size The size of the real data * @return The size of the real/imaginary complex arrays */ static size_t ComplexSize(size_t size); private: std::unique_ptr _impl; }; /** * @deprecated * @brief Let's keep an AudioFFTBase type around for now because it has been here already in the 1st version in order to avoid breaking existing code. */ typedef AudioFFT AudioFFTBase; } // End of namespace #endif // Header guard ================================================ FILE: COPYING.txt ================================================ Copyright (c) 2017 HiFi-LoFi Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ AudioFFT ======== AudioFFT provides real-to-complex/complex-to-real FFT routines. ## Features ## - Real-complex FFT and complex-real inverse FFT for power-of-2-sized real data. - Uniform interface to different FFT implementations (currently Ooura, FFTW3, Apple Accelerate and Intel IPP). - Complex data is handled in "split-complex" format, i.e. there are separate arrays for the real and imaginary parts which can be useful for SIMD optimizations (split-complex arrays have to be of length (size/2+1) representing bins from DC to Nyquist frequency). - Output is "ready to use" (all scaling etc. is already handled internally). - No allocations/deallocations after the initialization which makes it usable for real-time audio applications (that's what I wrote it for and using it). ## How to use it in your project ## - Add the .h and .cpp file to your project - that's all. - To get extra speed, you can link FFTW3 to your project and define AUDIOFFT_FFTW3 (however, please check whether your project suits the according license). - To get the best speed on Apple platforms, you can link the Apple Accelerate framework to your project and define AUDIOFFT_APPLE_ACCELERATE. - On any supported platform you can use Intel IPP's FFT's by linking to IPP and defining AUDIOFFT_INTEL_IPP. ## Remarks ## - AudioFFT is not intended to be the fastest FFT, but to be a fast-enough FFT suitable for most audio applications. - AudioFFT uses the quite liberal MIT license. ## Example usage ## #include "AudioFFT.h" void Example() { const size_t fftSize = 1024; // Needs to be power of 2! std::vector input(fftSize, 0.0f); std::vector re(fftaudio::AudioFFT::ComplexSize(fftSize)); std::vector im(fftaudio::AudioFFT::ComplexSize(fftSize)); std::vector output(fftSize); audiofft::AudioFFT fft; fft.init(1024); fft.fft(input.data(), re.data(), im.data()); fft.ifft(output.data(), re.data(), im.data()); } ## Benchmarks ## The following tables show time measurements for forward/backward "FFTing" 512MB of real data using the FFT input size as listed in the tables. AudioFFT was run using internally Ooura, FFTW3 and vDSP (Apple Accelerate). For comparison and reference, the same setup also was used with the great KissFFT, which is a quite popular FFT implementation, and which is also able to handle non-power-of-2 sizes. :-) #### CPU: Intel Core i5 (2,4 GHz) #### - Mac OS X Lion 10.7.5 - Compiler: Apple LLVM 3.0 (/Os (fastest, smallest), SSE enabled) | Size | Ooura | FFTW3 | Apple | KissFFT | | ----:| -------:| -------:| -------:| -------:| | 64 | 8.805s | 6.914s | 3.420s | 12.496s | | 128 | 10.047s | 6.473s | 2.992s | 11.457s | | 512 | 11.895s | 6.473s | 3.025s | 13.737s | | 1024 | 12.956s | 6.932s | 3.139s | 17.050s | | 4096 | 14.840s | 7.517s | 3.661s | 19.379s | #### CPU: Intel Xeon (2.93 GHz) #### - Windows 7 - Compiler: VC10 (/O2 /arch:SSE2 /fp:precise) | Size | Ooura | FFTW3 | KissFFT | | ----:| -------:| -------:| -------:| | 64 | 7.267s | 4.625s | 20.819s | | 128 | 7.583s | 5.494s | 20.822s | | 512 | 8.608s | 5.346s | 24.812s | | 1024 | 9.546s | 5.604s | 28.936s | | 4096 | 11.026s | 6.265s | 33.160s | ================================================ FILE: test/Test.cpp ================================================ // ================================================================================== // Copyright (c) 2013 HiFi-LoFi // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is furnished // to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS // FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR // COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER // IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. // ================================================================================== #include #include #include #include #include "../AudioFFT.h" #define TEST_CORRECTNESS //#define TEST_PERFORMANCE //#define TEST_PERFORMANCE_KISSFFT template static bool CheckBuffers(size_t size, const TA a, const TB b, double tolerance = 0.001) { size_t numDiff = 0; for (size_t i=0; i(a[i]); const double bb = static_cast(b[i]); const double diff = ::fabs(aa-bb); if (diff > tolerance) { ++numDiff; } } return (numDiff == 0); } static void TestCorrectness(size_t inputSize, const double* refRe, const double* refIm) { bool success = true; std::vector input(inputSize); for (size_t i=0; i(i+1); } audiofft::AudioFFT fft; fft.init(inputSize); const size_t complexSize = audiofft::AudioFFT::ComplexSize(inputSize); std::vector re(complexSize); std::vector im(complexSize); fft.fft(&input[0], &re[0], &im[0]); success &= (CheckBuffers(complexSize, re, refRe) == true); success &= (CheckBuffers(complexSize, im, refIm) == true); std::vector backward(inputSize, -10000.0f); fft.ifft(&backward[0], &re[0], &im[0]); success &= (CheckBuffers(inputSize, backward, input) == true); printf("Correctness (input size %d) => %s\n", static_cast(inputSize), success ? "[OK]" : "[FAILED]"); } static void TestCorrectness() { { const double Re2[] = { 3.0000000,-1.0000000 }; const double Im2[] = { 0.00000000,0.00000000 }; TestCorrectness(2, Re2, Im2); } { const double Re4[] = { 10.000000,-2.0000000,-2.0000000 }; const double Im4[] = { 0.00000000,2.0000000,0.00000000 }; TestCorrectness(4, Re4, Im4); } { const double Re8[] = { 36.000000,-4.0000000,-4.0000000,-4.0000000,-4.0000000 }; const double Im8[] = { 0.00000000,9.6568546,4.0000000,1.6568543,0.00000000 }; TestCorrectness(8, Re8, Im8); } { const double Re16[] = { 136.00000,-8.0000000,-8.0000000,-8.0000000,-8.0000000,-8.0000000,-8.0000000,-8.0000000,-8.0000000 }; const double Im16[] = { 0.00000000,40.218716,19.313709,11.972846,8.0000000,5.3454289,3.3137085,1.5912989,0.00000000 }; TestCorrectness(16, Re16, Im16); } { const double Re32[] = { 528.00000,-16.000000,-16.000000,-16.000000,-16.000000,-16.000000,-16.000000,-16.000000, -16.000000,-16.000000,-16.000000,-16.000000,-16.000000,-16.000000,-16.000000,-16.000000,-16.000000 }; const double Im32[] = { 0.00000000,162.45073,80.437431,52.744930,38.627419,29.933895,23.945692,19.496056,16.000000,13.130860, 10.690858,8.5521784,6.6274171,4.8535471,3.1825979,1.5758624,0.00000000 }; TestCorrectness(32, Re32, Im32); } { const double Re64[] = { 2080.0000,-32.000000,-32.000000,-32.000000,-32.000000,-32.000000,-32.000000,-32.000000,-32.000000, -32.000000,-32.000000,-32.000000,-32.000000,-32.000000,-32.000000,-32.000000,-32.000000,-32.000000, -32.000000,-32.000000,-32.000000,-32.000000,-32.000000,-32.000000,-32.000000,-32.000000,-32.000000, -32.000000,-32.000000,-32.000000,-32.000000,-32.000000,-32.000000 }; const double Im64[] = { 0.00000000,651.37494,324.90146,215.72647,160.87486,127.75116,105.48986,89.434006,77.254837,67.658318, 59.867790,53.388775,47.891384,43.147007,38.992111,35.306561,32.000000,29.003109,26.261721,23.732817, 21.381716,19.180061,17.104357,15.134872,13.254834,11.449783,9.7070942,8.0155830,6.3651958,4.7467518, 3.1517248,1.5720592,0.00000000 }; TestCorrectness(64, Re64, Im64); } { const double Re128[] = { 8256.0000,-64.000000,-64.000000,-64.000000,-64.000000,-64.000000,-64.000000,-64.000000,-64.000000, -64.000000,-64.000000,-64.000000,-64.000000,-64.000000,-64.000000,-64.000000,-64.000000,-64.000000, -64.000000,-64.000000,-64.000000,-64.000000,-64.000000,-64.000000,-64.000000,-64.000000,-64.000000, -64.000000,-64.000000,-64.000000,-64.000000,-64.000000,-64.000000,-64.000000,-64.000000,-64.000000, -64.000000,-64.000000,-64.000000,-64.000000,-64.000000,-64.000000,-64.000000,-64.000000,-64.000000, -64.000000,-64.000000,-64.000000,-64.000000,-64.000000,-64.000000,-64.000000,-64.000000,-64.000000, -64.000000,-64.000000,-64.000000,-64.000000,-64.000000,-64.000000,-64.000000,-64.000000,-64.000000, -64.000000,-64.000000 }; const double Im128[] = { 0.00000000,2607.0710,1302.7499,867.62683,649.80292,518.89832,431.45294,368.84109,321.74973,285.00494, 255.50232,231.26628,210.97972,193.73076,178.86801,165.91376,154.50967,144.38168,135.31664,127.14616, 119.73558,112.97580,106.77755,101.06705,95.782768,90.873016,86.294014,82.008423,77.984222,74.193787, 70.613121,67.221306,64.000000,60.933064,58.006218,55.206779,52.523441,49.946091,47.465633,45.073887, 42.763432,40.527554,38.360123,36.255550,34.208714,32.214893,30.269745,28.369249,26.509668,24.687525, 22.899567,21.142744,19.414188,17.711185,16.031166,14.371680,12.730392,11.105054,9.4935036,7.8936472, 6.3034496,4.7209234,3.1441183,1.5711118,0.00000000 }; TestCorrectness(128, Re128, Im128); } { const double Re256[] = { 32896.000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000, -128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000, -128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000, -128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000, -128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000, -128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000, -128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000, -128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000, -128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000, -128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000, -128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000, -128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000, -128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000, -128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000,-128.00000, -128.00000,-128.00000,-128.00000 }; const double Im256[] = { 0.00000000,10429.854,5214.1421,3475.2219,2605.4998,2083.4570,1735.2537,1486.3871,1299.6058,1154.2147, 1037.7966,942.44965,862.90588,795.51843,737.68219,687.48676,643.49945,604.62457,570.00989,538.98267, 511.00464,485.64011,462.53256,441.38748,421.95944,404.04230,387.46152,372.06857,357.73602,344.35406, 331.82751,320.07346,309.01935,298.60141,288.76337,279.45544,270.63327,262.25735,254.29233,246.70645, 239.47116,232.56065,225.95160,219.62283,213.55510,207.73085,202.13409,196.75014,191.56554,186.56796, 181.74603,177.08928,172.58803,168.23331,164.01685,159.93094,155.96844,152.12273,148.38757,144.75720, 141.22624,137.78961,134.44261,131.18079,128.00000,124.89634,121.86613,118.90591,116.01244,113.18262, 110.41356,107.70251,105.04688,102.44421,99.892181,97.388565,94.931267,92.518303,90.147774,87.817863, 85.526863,83.273132,81.055107,78.871284,76.720245,74.600624,72.511101,70.450439,68.417427,66.410912, 64.429787,62.472988,60.539490,58.628311,56.738499,54.869133,53.019337,51.188248,49.375050,47.578934, 45.799133,44.034893,42.285488,40.550213,38.828377,37.119312,35.422371,33.736916,32.062332,30.398010, 28.743361,27.097807,25.460783,23.831732,22.210108,20.595375,18.987007,17.384483,15.787294,14.194933, 12.606899,11.022701,9.4418468,7.8638530,6.2882366,4.7145190,3.1422236,1.5708752,0.00000000 }; TestCorrectness(256, Re256, Im256); } } static void TestPerformance(const size_t inputSize) { const size_t overallSize = size_t(512) * size_t(1024) * size_t(1024); const size_t iterations = overallSize / inputSize; printf("Performance (processing %d MB, input size %d)", static_cast(overallSize / (1024 * 1024)), static_cast(inputSize)); std::vector input(inputSize); for (size_t i=0; i(i); } const size_t complexSize = audiofft::AudioFFT::ComplexSize(inputSize); std::vector re(complexSize); std::vector im(complexSize); std::vector backward(inputSize); { audiofft::AudioFFT fft; fft.init(inputSize); for (size_t i=0; i Completed!\n"); } #ifdef TEST_PERFORMANCE_KISSFFT #include "kiss_fftr.h" static void TestPerformanceKissFFT(const size_t inputSize) { const size_t overallSize = size_t(512) * size_t(1024) * size_t(1024); const size_t iterations = overallSize / inputSize; printf("Performance KissFFT (processing %d MB, input size %d)", static_cast(overallSize / (1024 * 1024)), static_cast(inputSize)); std::vector input(inputSize); for (size_t i=0; i(i); } std::vector complex(inputSize/2 + 1); std::vector backward(inputSize); kiss_fftr_cfg configForward = kiss_fftr_alloc(static_cast(inputSize), 0, 0, 0); kiss_fftr_cfg configBackward = kiss_fftr_alloc(static_cast(inputSize), 1, 0, 0); for (size_t i=0; i Completed!\n"); } #endif // TEST_PERFORMANCE_KISSFFT int main() { #ifdef TEST_CORRECTNESS TestCorrectness(); #endif #ifdef TEST_PERFORMANCE TestPerformance(64); TestPerformance(128); TestPerformance(512); TestPerformance(1024); TestPerformance(4096); #endif #ifdef TEST_PERFORMANCE_KISSFFT TestPerformanceKissFFT(64); TestPerformanceKissFFT(128); TestPerformanceKissFFT(512); TestPerformanceKissFFT(1024); TestPerformanceKissFFT(4096); #endif return 0; }