Chris@49: // This Source Code Form is a compilation of: Chris@49: // (1) source code written by Conrad Sanderson, and Chris@49: // (2) a modified form of source code referred to as "kissfft.hh". Chris@49: // Chris@49: // This compilation is Copyright (C) 2013 Conrad Sanderson Chris@49: // and is subject to the terms of the Mozilla Public License, v. 2.0. Chris@49: // Chris@49: // The source code that is distinct and separate from "kissfft.hh" Chris@49: // is Copyright (C) 2013 Conrad Sanderson and is subject to the Chris@49: // terms of the Mozilla Public License, v. 2.0. Chris@49: // Chris@49: // If a copy of the MPL was not distributed with this file, Chris@49: // You can obtain one at http://mozilla.org/MPL/2.0/. Chris@49: // Chris@49: // The original "kissfft.hh" source code is licensed under a 3-clause BSD license, Chris@49: // as follows: Chris@49: // Chris@49: // Copyright (c) 2003-2010 Mark Borgerding Chris@49: // Chris@49: // All rights reserved. Chris@49: // Chris@49: // Redistribution and use in source and binary forms, with or without modification, Chris@49: // are permitted provided that the following conditions are met: Chris@49: // Chris@49: // * Redistributions of source code must retain the above copyright notice, Chris@49: // this list of conditions and the following disclaimer. Chris@49: // Chris@49: // * Redistributions in binary form must reproduce the above copyright notice, Chris@49: // this list of conditions and the following disclaimer in the documentation Chris@49: // and/or other materials provided with the distribution. Chris@49: // Chris@49: // * Neither the author nor the names of any contributors may be used to endorse or promote Chris@49: // products derived from this software without specific prior written permission. Chris@49: // Chris@49: // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS Chris@49: // OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY Chris@49: // AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER Chris@49: // OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, Chris@49: // OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS Chris@49: // OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON Chris@49: // ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE Chris@49: // OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. Chris@49: Chris@49: Chris@49: Chris@49: template struct store {}; Chris@49: Chris@49: template Chris@49: struct store Chris@49: { Chris@49: static const uword N = fixed_N; Chris@49: Chris@49: arma_aligned cx_type coeffs_array[fixed_N]; Chris@49: Chris@49: inline store() {} Chris@49: inline store(uword) {} Chris@49: Chris@49: arma_inline cx_type* coeffs_ptr() { return &coeffs_array[0]; } Chris@49: arma_inline const cx_type* coeffs_ptr() const { return &coeffs_array[0]; } Chris@49: }; Chris@49: Chris@49: Chris@49: Chris@49: template Chris@49: struct store Chris@49: { Chris@49: const uword N; Chris@49: Chris@49: podarray coeffs_array; Chris@49: Chris@49: inline store() : N(0) {} Chris@49: inline store(uword in_N) : N(in_N) { coeffs_array.set_size(N); } Chris@49: Chris@49: arma_inline cx_type* coeffs_ptr() { return coeffs_array.memptr(); } Chris@49: arma_inline const cx_type* coeffs_ptr() const { return coeffs_array.memptr(); } Chris@49: }; Chris@49: Chris@49: Chris@49: Chris@49: template Chris@49: class fft_engine : public store 0)> Chris@49: { Chris@49: public: Chris@49: Chris@49: typedef typename get_pod_type::result T; Chris@49: Chris@49: using store 0)>::N; Chris@49: using store 0)>::coeffs_ptr; Chris@49: Chris@49: podarray residue; Chris@49: podarray radix; Chris@49: Chris@49: podarray tmp_array; Chris@49: Chris@49: Chris@49: template Chris@49: inline Chris@49: uword Chris@49: calc_radix() Chris@49: { Chris@49: uword i = 0; Chris@49: Chris@49: for(uword n = N, r=4; n >= 2; ++i) Chris@49: { Chris@49: while( (n % r) > 0 ) Chris@49: { Chris@49: switch(r) Chris@49: { Chris@49: case 2: r = 3; break; Chris@49: case 4: r = 2; break; Chris@49: default: r += 2; break; Chris@49: } Chris@49: Chris@49: if(r*r > n) { r = n; } Chris@49: } Chris@49: Chris@49: n /= r; Chris@49: Chris@49: if(fill) Chris@49: { Chris@49: residue[i] = n; Chris@49: radix[i] = r; Chris@49: } Chris@49: } Chris@49: Chris@49: return i; Chris@49: } Chris@49: Chris@49: Chris@49: Chris@49: inline Chris@49: fft_engine(const uword in_N) Chris@49: : store< cx_type, fixed_N, (fixed_N > 0) >(in_N) Chris@49: { Chris@49: arma_extra_debug_sigprint(); Chris@49: Chris@49: const uword len = calc_radix(); Chris@49: Chris@49: residue.set_size(len); Chris@49: radix.set_size(len); Chris@49: Chris@49: calc_radix(); Chris@49: Chris@49: Chris@49: // calculate the constant coefficients Chris@49: Chris@49: cx_type* coeffs = coeffs_ptr(); Chris@49: Chris@49: const T k = T( (inverse) ? +2 : -2 ) * std::acos( T(-1) ) / T(N); Chris@49: Chris@49: for(uword i=0; i < N; ++i) { coeffs[i] = std::exp( cx_type(T(0), i*k) ); } Chris@49: } Chris@49: Chris@49: Chris@49: Chris@49: arma_hot Chris@49: inline Chris@49: void Chris@49: butterfly_2(cx_type* Y, const uword stride, const uword m) Chris@49: { Chris@49: arma_extra_debug_sigprint(); Chris@49: Chris@49: const cx_type* coeffs = coeffs_ptr(); Chris@49: Chris@49: for(uword i=0; i < m; ++i) Chris@49: { Chris@49: const cx_type t = Y[i+m] * coeffs[i*stride]; Chris@49: Chris@49: Y[i+m] = Y[i] - t; Chris@49: Y[i ] += t; Chris@49: } Chris@49: } Chris@49: Chris@49: Chris@49: Chris@49: arma_hot Chris@49: inline Chris@49: void Chris@49: butterfly_3(cx_type* Y, const uword stride, const uword m) Chris@49: { Chris@49: arma_extra_debug_sigprint(); Chris@49: Chris@49: arma_aligned cx_type tmp[5]; Chris@49: Chris@49: cx_type* coeffs1 = coeffs_ptr(); Chris@49: cx_type* coeffs2 = coeffs1; Chris@49: Chris@49: const T coeff_sm_imag = coeffs1[stride*m].imag(); Chris@49: Chris@49: const uword n = m*2; Chris@49: Chris@49: // TODO: rearrange the indices within tmp[] into a more sane order Chris@49: Chris@49: for(uword i = m; i > 0; --i) Chris@49: { Chris@49: tmp[1] = Y[m] * (*coeffs1); Chris@49: tmp[2] = Y[n] * (*coeffs2); Chris@49: Chris@49: tmp[0] = tmp[1] - tmp[2]; Chris@49: tmp[0] *= coeff_sm_imag; Chris@49: Chris@49: tmp[3] = tmp[1] + tmp[2]; Chris@49: Chris@49: Y[m] = cx_type( (Y[0].real() - (0.5*tmp[3].real())), (Y[0].imag() - (0.5*tmp[3].imag())) ); Chris@49: Chris@49: Y[0] += tmp[3]; Chris@49: Chris@49: Chris@49: Y[n] = cx_type( (Y[m].real() + tmp[0].imag()), (Y[m].imag() - tmp[0].real()) ); Chris@49: Chris@49: Y[m] += cx_type( -tmp[0].imag(), tmp[0].real() ); Chris@49: Chris@49: Y++; Chris@49: Chris@49: coeffs1 += stride; Chris@49: coeffs2 += stride*2; Chris@49: } Chris@49: } Chris@49: Chris@49: Chris@49: Chris@49: arma_hot Chris@49: inline Chris@49: void Chris@49: butterfly_4(cx_type* Y, const uword stride, const uword m) Chris@49: { Chris@49: arma_extra_debug_sigprint(); Chris@49: Chris@49: arma_aligned cx_type tmp[7]; Chris@49: Chris@49: const cx_type* coeffs = coeffs_ptr(); Chris@49: Chris@49: const uword m2 = m*2; Chris@49: const uword m3 = m*3; Chris@49: Chris@49: // TODO: rearrange the indices within tmp[] into a more sane order Chris@49: Chris@49: for(uword i=0; i < m; ++i) Chris@49: { Chris@49: tmp[0] = Y[i + m ] * coeffs[i*stride ]; Chris@49: tmp[2] = Y[i + m3] * coeffs[i*stride*3]; Chris@49: tmp[3] = tmp[0] + tmp[2]; Chris@49: Chris@49: //tmp[4] = tmp[0] - tmp[2]; Chris@49: //tmp[4] = (inverse) ? cx_type( -(tmp[4].imag()), tmp[4].real() ) : cx_type( tmp[4].imag(), -tmp[4].real() ); Chris@49: Chris@49: tmp[4] = (inverse) Chris@49: ? cx_type( (tmp[2].imag() - tmp[0].imag()), (tmp[0].real() - tmp[2].real()) ) Chris@49: : cx_type( (tmp[0].imag() - tmp[2].imag()), (tmp[2].real() - tmp[0].real()) ); Chris@49: Chris@49: tmp[1] = Y[i + m2] * coeffs[i*stride*2]; Chris@49: tmp[5] = Y[i] - tmp[1]; Chris@49: Chris@49: Chris@49: Y[i ] += tmp[1]; Chris@49: Y[i + m2] = Y[i] - tmp[3]; Chris@49: Y[i ] += tmp[3]; Chris@49: Y[i + m ] = tmp[5] + tmp[4]; Chris@49: Y[i + m3] = tmp[5] - tmp[4]; Chris@49: } Chris@49: } Chris@49: Chris@49: Chris@49: Chris@49: inline Chris@49: arma_hot Chris@49: void Chris@49: butterfly_5(cx_type* Y, const uword stride, const uword m) Chris@49: { Chris@49: arma_extra_debug_sigprint(); Chris@49: Chris@49: arma_aligned cx_type tmp[13]; Chris@49: Chris@49: const cx_type* coeffs = coeffs_ptr(); Chris@49: Chris@49: const T a_real = coeffs[stride*1*m].real(); Chris@49: const T a_imag = coeffs[stride*1*m].imag(); Chris@49: Chris@49: const T b_real = coeffs[stride*2*m].real(); Chris@49: const T b_imag = coeffs[stride*2*m].imag(); Chris@49: Chris@49: cx_type* Y0 = Y; Chris@49: cx_type* Y1 = Y + 1*m; Chris@49: cx_type* Y2 = Y + 2*m; Chris@49: cx_type* Y3 = Y + 3*m; Chris@49: cx_type* Y4 = Y + 4*m; Chris@49: Chris@49: for(uword i=0; i < m; ++i) Chris@49: { Chris@49: tmp[0] = (*Y0); Chris@49: Chris@49: tmp[1] = (*Y1) * coeffs[stride*1*i]; Chris@49: tmp[2] = (*Y2) * coeffs[stride*2*i]; Chris@49: tmp[3] = (*Y3) * coeffs[stride*3*i]; Chris@49: tmp[4] = (*Y4) * coeffs[stride*4*i]; Chris@49: Chris@49: tmp[7] = tmp[1] + tmp[4]; Chris@49: tmp[8] = tmp[2] + tmp[3]; Chris@49: tmp[9] = tmp[2] - tmp[3]; Chris@49: tmp[10] = tmp[1] - tmp[4]; Chris@49: Chris@49: (*Y0) += tmp[7]; Chris@49: (*Y0) += tmp[8]; Chris@49: Chris@49: tmp[5] = tmp[0] + cx_type( ( (tmp[7].real() * a_real) + (tmp[8].real() * b_real) ), ( (tmp[7].imag() * a_real) + (tmp[8].imag() * b_real) ) ); Chris@49: Chris@49: tmp[6] = cx_type( ( (tmp[10].imag() * a_imag) + (tmp[9].imag() * b_imag) ), ( -(tmp[10].real() * a_imag) - (tmp[9].real() * b_imag) ) ); Chris@49: Chris@49: (*Y1) = tmp[5] - tmp[6]; Chris@49: (*Y4) = tmp[5] + tmp[6]; Chris@49: Chris@49: tmp[11] = tmp[0] + cx_type( ( (tmp[7].real() * b_real) + (tmp[8].real() * a_real) ), ( (tmp[7].imag() * b_real) + (tmp[8].imag() * a_real) ) ); Chris@49: Chris@49: tmp[12] = cx_type( ( -(tmp[10].imag() * b_imag) + (tmp[9].imag() * a_imag) ), ( (tmp[10].real() * b_imag) - (tmp[9].real() * a_imag) ) ); Chris@49: Chris@49: (*Y2) = tmp[11] + tmp[12]; Chris@49: (*Y3) = tmp[11] - tmp[12]; Chris@49: Chris@49: Y0++; Chris@49: Y1++; Chris@49: Y2++; Chris@49: Y3++; Chris@49: Y4++; Chris@49: } Chris@49: } Chris@49: Chris@49: Chris@49: Chris@49: arma_hot Chris@49: inline Chris@49: void Chris@49: butterfly_N(cx_type* Y, const uword stride, const uword m, const uword r) Chris@49: { Chris@49: arma_extra_debug_sigprint(); Chris@49: Chris@49: const cx_type* coeffs = coeffs_ptr(); Chris@49: Chris@49: tmp_array.set_min_size(r); Chris@49: cx_type* tmp = tmp_array.memptr(); Chris@49: Chris@49: for(uword u=0; u < m; ++u) Chris@49: { Chris@49: uword k = u; Chris@49: Chris@49: for(uword v=0; v < r; ++v) Chris@49: { Chris@49: tmp[v] = Y[k]; Chris@49: k += m; Chris@49: } Chris@49: Chris@49: k = u; Chris@49: Chris@49: for(uword v=0; v < r; ++v) Chris@49: { Chris@49: Y[k] = tmp[0]; Chris@49: Chris@49: uword j = 0; Chris@49: Chris@49: for(uword w=1; w < r; ++w) Chris@49: { Chris@49: j += stride * k; Chris@49: Chris@49: if(j >= N) { j -= N; } Chris@49: Chris@49: Y[k] += tmp[w] * coeffs[j]; Chris@49: } Chris@49: Chris@49: k += m; Chris@49: } Chris@49: } Chris@49: } Chris@49: Chris@49: Chris@49: Chris@49: inline Chris@49: void Chris@49: run(cx_type* Y, const cx_type* X, const uword stage = 0, const uword stride = 1) Chris@49: { Chris@49: arma_extra_debug_sigprint(); Chris@49: Chris@49: const uword m = residue[stage]; Chris@49: const uword r = radix[stage]; Chris@49: Chris@49: const cx_type *Y_end = Y + r*m; Chris@49: Chris@49: if(m == 1) Chris@49: { Chris@49: for(cx_type* Yi = Y; Yi != Y_end; Yi++, X += stride) { (*Yi) = (*X); } Chris@49: } Chris@49: else Chris@49: { Chris@49: const uword next_stage = stage + 1; Chris@49: const uword next_stride = stride * r; Chris@49: Chris@49: for(cx_type* Yi = Y; Yi != Y_end; Yi += m, X += stride) { run(Yi, X, next_stage, next_stride); } Chris@49: } Chris@49: Chris@49: switch(r) Chris@49: { Chris@49: case 2: butterfly_2(Y, stride, m ); break; Chris@49: case 3: butterfly_3(Y, stride, m ); break; Chris@49: case 4: butterfly_4(Y, stride, m ); break; Chris@49: case 5: butterfly_5(Y, stride, m ); break; Chris@49: default: butterfly_N(Y, stride, m, r); break; Chris@49: } Chris@49: } Chris@49: Chris@49: Chris@49: };