annotate armadillo-3.900.4/include/armadillo_bits/fft_engine.hpp @ 84:55a047986812 tip

Update library URI so as not to be document-local
author Chris Cannam
date Wed, 22 Apr 2020 14:21:57 +0100
parents 1ec0e2823891
children
rev   line source
Chris@49 1 // This Source Code Form is a compilation of:
Chris@49 2 // (1) source code written by Conrad Sanderson, and
Chris@49 3 // (2) a modified form of source code referred to as "kissfft.hh".
Chris@49 4 //
Chris@49 5 // This compilation is Copyright (C) 2013 Conrad Sanderson
Chris@49 6 // and is subject to the terms of the Mozilla Public License, v. 2.0.
Chris@49 7 //
Chris@49 8 // The source code that is distinct and separate from "kissfft.hh"
Chris@49 9 // is Copyright (C) 2013 Conrad Sanderson and is subject to the
Chris@49 10 // terms of the Mozilla Public License, v. 2.0.
Chris@49 11 //
Chris@49 12 // If a copy of the MPL was not distributed with this file,
Chris@49 13 // You can obtain one at http://mozilla.org/MPL/2.0/.
Chris@49 14 //
Chris@49 15 // The original "kissfft.hh" source code is licensed under a 3-clause BSD license,
Chris@49 16 // as follows:
Chris@49 17 //
Chris@49 18 // Copyright (c) 2003-2010 Mark Borgerding
Chris@49 19 //
Chris@49 20 // All rights reserved.
Chris@49 21 //
Chris@49 22 // Redistribution and use in source and binary forms, with or without modification,
Chris@49 23 // are permitted provided that the following conditions are met:
Chris@49 24 //
Chris@49 25 // * Redistributions of source code must retain the above copyright notice,
Chris@49 26 // this list of conditions and the following disclaimer.
Chris@49 27 //
Chris@49 28 // * Redistributions in binary form must reproduce the above copyright notice,
Chris@49 29 // this list of conditions and the following disclaimer in the documentation
Chris@49 30 // and/or other materials provided with the distribution.
Chris@49 31 //
Chris@49 32 // * Neither the author nor the names of any contributors may be used to endorse or promote
Chris@49 33 // products derived from this software without specific prior written permission.
Chris@49 34 //
Chris@49 35 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
Chris@49 36 // OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
Chris@49 37 // AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
Chris@49 38 // OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
Chris@49 39 // OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
Chris@49 40 // OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
Chris@49 41 // ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
Chris@49 42 // OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Chris@49 43
Chris@49 44
Chris@49 45
Chris@49 46 template<typename cx_type, uword fixed_N, bool> struct store {};
Chris@49 47
Chris@49 48 template<typename cx_type, uword fixed_N>
Chris@49 49 struct store<cx_type, fixed_N, true>
Chris@49 50 {
Chris@49 51 static const uword N = fixed_N;
Chris@49 52
Chris@49 53 arma_aligned cx_type coeffs_array[fixed_N];
Chris@49 54
Chris@49 55 inline store() {}
Chris@49 56 inline store(uword) {}
Chris@49 57
Chris@49 58 arma_inline cx_type* coeffs_ptr() { return &coeffs_array[0]; }
Chris@49 59 arma_inline const cx_type* coeffs_ptr() const { return &coeffs_array[0]; }
Chris@49 60 };
Chris@49 61
Chris@49 62
Chris@49 63
Chris@49 64 template<typename cx_type, uword fixed_N>
Chris@49 65 struct store<cx_type, fixed_N, false>
Chris@49 66 {
Chris@49 67 const uword N;
Chris@49 68
Chris@49 69 podarray<cx_type> coeffs_array;
Chris@49 70
Chris@49 71 inline store() : N(0) {}
Chris@49 72 inline store(uword in_N) : N(in_N) { coeffs_array.set_size(N); }
Chris@49 73
Chris@49 74 arma_inline cx_type* coeffs_ptr() { return coeffs_array.memptr(); }
Chris@49 75 arma_inline const cx_type* coeffs_ptr() const { return coeffs_array.memptr(); }
Chris@49 76 };
Chris@49 77
Chris@49 78
Chris@49 79
Chris@49 80 template<typename cx_type, bool inverse, uword fixed_N = 0>
Chris@49 81 class fft_engine : public store<cx_type, fixed_N, (fixed_N > 0)>
Chris@49 82 {
Chris@49 83 public:
Chris@49 84
Chris@49 85 typedef typename get_pod_type<cx_type>::result T;
Chris@49 86
Chris@49 87 using store<cx_type, fixed_N, (fixed_N > 0)>::N;
Chris@49 88 using store<cx_type, fixed_N, (fixed_N > 0)>::coeffs_ptr;
Chris@49 89
Chris@49 90 podarray<uword> residue;
Chris@49 91 podarray<uword> radix;
Chris@49 92
Chris@49 93 podarray<cx_type> tmp_array;
Chris@49 94
Chris@49 95
Chris@49 96 template<bool fill>
Chris@49 97 inline
Chris@49 98 uword
Chris@49 99 calc_radix()
Chris@49 100 {
Chris@49 101 uword i = 0;
Chris@49 102
Chris@49 103 for(uword n = N, r=4; n >= 2; ++i)
Chris@49 104 {
Chris@49 105 while( (n % r) > 0 )
Chris@49 106 {
Chris@49 107 switch(r)
Chris@49 108 {
Chris@49 109 case 2: r = 3; break;
Chris@49 110 case 4: r = 2; break;
Chris@49 111 default: r += 2; break;
Chris@49 112 }
Chris@49 113
Chris@49 114 if(r*r > n) { r = n; }
Chris@49 115 }
Chris@49 116
Chris@49 117 n /= r;
Chris@49 118
Chris@49 119 if(fill)
Chris@49 120 {
Chris@49 121 residue[i] = n;
Chris@49 122 radix[i] = r;
Chris@49 123 }
Chris@49 124 }
Chris@49 125
Chris@49 126 return i;
Chris@49 127 }
Chris@49 128
Chris@49 129
Chris@49 130
Chris@49 131 inline
Chris@49 132 fft_engine(const uword in_N)
Chris@49 133 : store< cx_type, fixed_N, (fixed_N > 0) >(in_N)
Chris@49 134 {
Chris@49 135 arma_extra_debug_sigprint();
Chris@49 136
Chris@49 137 const uword len = calc_radix<false>();
Chris@49 138
Chris@49 139 residue.set_size(len);
Chris@49 140 radix.set_size(len);
Chris@49 141
Chris@49 142 calc_radix<true>();
Chris@49 143
Chris@49 144
Chris@49 145 // calculate the constant coefficients
Chris@49 146
Chris@49 147 cx_type* coeffs = coeffs_ptr();
Chris@49 148
Chris@49 149 const T k = T( (inverse) ? +2 : -2 ) * std::acos( T(-1) ) / T(N);
Chris@49 150
Chris@49 151 for(uword i=0; i < N; ++i) { coeffs[i] = std::exp( cx_type(T(0), i*k) ); }
Chris@49 152 }
Chris@49 153
Chris@49 154
Chris@49 155
Chris@49 156 arma_hot
Chris@49 157 inline
Chris@49 158 void
Chris@49 159 butterfly_2(cx_type* Y, const uword stride, const uword m)
Chris@49 160 {
Chris@49 161 arma_extra_debug_sigprint();
Chris@49 162
Chris@49 163 const cx_type* coeffs = coeffs_ptr();
Chris@49 164
Chris@49 165 for(uword i=0; i < m; ++i)
Chris@49 166 {
Chris@49 167 const cx_type t = Y[i+m] * coeffs[i*stride];
Chris@49 168
Chris@49 169 Y[i+m] = Y[i] - t;
Chris@49 170 Y[i ] += t;
Chris@49 171 }
Chris@49 172 }
Chris@49 173
Chris@49 174
Chris@49 175
Chris@49 176 arma_hot
Chris@49 177 inline
Chris@49 178 void
Chris@49 179 butterfly_3(cx_type* Y, const uword stride, const uword m)
Chris@49 180 {
Chris@49 181 arma_extra_debug_sigprint();
Chris@49 182
Chris@49 183 arma_aligned cx_type tmp[5];
Chris@49 184
Chris@49 185 cx_type* coeffs1 = coeffs_ptr();
Chris@49 186 cx_type* coeffs2 = coeffs1;
Chris@49 187
Chris@49 188 const T coeff_sm_imag = coeffs1[stride*m].imag();
Chris@49 189
Chris@49 190 const uword n = m*2;
Chris@49 191
Chris@49 192 // TODO: rearrange the indices within tmp[] into a more sane order
Chris@49 193
Chris@49 194 for(uword i = m; i > 0; --i)
Chris@49 195 {
Chris@49 196 tmp[1] = Y[m] * (*coeffs1);
Chris@49 197 tmp[2] = Y[n] * (*coeffs2);
Chris@49 198
Chris@49 199 tmp[0] = tmp[1] - tmp[2];
Chris@49 200 tmp[0] *= coeff_sm_imag;
Chris@49 201
Chris@49 202 tmp[3] = tmp[1] + tmp[2];
Chris@49 203
Chris@49 204 Y[m] = cx_type( (Y[0].real() - (0.5*tmp[3].real())), (Y[0].imag() - (0.5*tmp[3].imag())) );
Chris@49 205
Chris@49 206 Y[0] += tmp[3];
Chris@49 207
Chris@49 208
Chris@49 209 Y[n] = cx_type( (Y[m].real() + tmp[0].imag()), (Y[m].imag() - tmp[0].real()) );
Chris@49 210
Chris@49 211 Y[m] += cx_type( -tmp[0].imag(), tmp[0].real() );
Chris@49 212
Chris@49 213 Y++;
Chris@49 214
Chris@49 215 coeffs1 += stride;
Chris@49 216 coeffs2 += stride*2;
Chris@49 217 }
Chris@49 218 }
Chris@49 219
Chris@49 220
Chris@49 221
Chris@49 222 arma_hot
Chris@49 223 inline
Chris@49 224 void
Chris@49 225 butterfly_4(cx_type* Y, const uword stride, const uword m)
Chris@49 226 {
Chris@49 227 arma_extra_debug_sigprint();
Chris@49 228
Chris@49 229 arma_aligned cx_type tmp[7];
Chris@49 230
Chris@49 231 const cx_type* coeffs = coeffs_ptr();
Chris@49 232
Chris@49 233 const uword m2 = m*2;
Chris@49 234 const uword m3 = m*3;
Chris@49 235
Chris@49 236 // TODO: rearrange the indices within tmp[] into a more sane order
Chris@49 237
Chris@49 238 for(uword i=0; i < m; ++i)
Chris@49 239 {
Chris@49 240 tmp[0] = Y[i + m ] * coeffs[i*stride ];
Chris@49 241 tmp[2] = Y[i + m3] * coeffs[i*stride*3];
Chris@49 242 tmp[3] = tmp[0] + tmp[2];
Chris@49 243
Chris@49 244 //tmp[4] = tmp[0] - tmp[2];
Chris@49 245 //tmp[4] = (inverse) ? cx_type( -(tmp[4].imag()), tmp[4].real() ) : cx_type( tmp[4].imag(), -tmp[4].real() );
Chris@49 246
Chris@49 247 tmp[4] = (inverse)
Chris@49 248 ? cx_type( (tmp[2].imag() - tmp[0].imag()), (tmp[0].real() - tmp[2].real()) )
Chris@49 249 : cx_type( (tmp[0].imag() - tmp[2].imag()), (tmp[2].real() - tmp[0].real()) );
Chris@49 250
Chris@49 251 tmp[1] = Y[i + m2] * coeffs[i*stride*2];
Chris@49 252 tmp[5] = Y[i] - tmp[1];
Chris@49 253
Chris@49 254
Chris@49 255 Y[i ] += tmp[1];
Chris@49 256 Y[i + m2] = Y[i] - tmp[3];
Chris@49 257 Y[i ] += tmp[3];
Chris@49 258 Y[i + m ] = tmp[5] + tmp[4];
Chris@49 259 Y[i + m3] = tmp[5] - tmp[4];
Chris@49 260 }
Chris@49 261 }
Chris@49 262
Chris@49 263
Chris@49 264
Chris@49 265 inline
Chris@49 266 arma_hot
Chris@49 267 void
Chris@49 268 butterfly_5(cx_type* Y, const uword stride, const uword m)
Chris@49 269 {
Chris@49 270 arma_extra_debug_sigprint();
Chris@49 271
Chris@49 272 arma_aligned cx_type tmp[13];
Chris@49 273
Chris@49 274 const cx_type* coeffs = coeffs_ptr();
Chris@49 275
Chris@49 276 const T a_real = coeffs[stride*1*m].real();
Chris@49 277 const T a_imag = coeffs[stride*1*m].imag();
Chris@49 278
Chris@49 279 const T b_real = coeffs[stride*2*m].real();
Chris@49 280 const T b_imag = coeffs[stride*2*m].imag();
Chris@49 281
Chris@49 282 cx_type* Y0 = Y;
Chris@49 283 cx_type* Y1 = Y + 1*m;
Chris@49 284 cx_type* Y2 = Y + 2*m;
Chris@49 285 cx_type* Y3 = Y + 3*m;
Chris@49 286 cx_type* Y4 = Y + 4*m;
Chris@49 287
Chris@49 288 for(uword i=0; i < m; ++i)
Chris@49 289 {
Chris@49 290 tmp[0] = (*Y0);
Chris@49 291
Chris@49 292 tmp[1] = (*Y1) * coeffs[stride*1*i];
Chris@49 293 tmp[2] = (*Y2) * coeffs[stride*2*i];
Chris@49 294 tmp[3] = (*Y3) * coeffs[stride*3*i];
Chris@49 295 tmp[4] = (*Y4) * coeffs[stride*4*i];
Chris@49 296
Chris@49 297 tmp[7] = tmp[1] + tmp[4];
Chris@49 298 tmp[8] = tmp[2] + tmp[3];
Chris@49 299 tmp[9] = tmp[2] - tmp[3];
Chris@49 300 tmp[10] = tmp[1] - tmp[4];
Chris@49 301
Chris@49 302 (*Y0) += tmp[7];
Chris@49 303 (*Y0) += tmp[8];
Chris@49 304
Chris@49 305 tmp[5] = tmp[0] + cx_type( ( (tmp[7].real() * a_real) + (tmp[8].real() * b_real) ), ( (tmp[7].imag() * a_real) + (tmp[8].imag() * b_real) ) );
Chris@49 306
Chris@49 307 tmp[6] = cx_type( ( (tmp[10].imag() * a_imag) + (tmp[9].imag() * b_imag) ), ( -(tmp[10].real() * a_imag) - (tmp[9].real() * b_imag) ) );
Chris@49 308
Chris@49 309 (*Y1) = tmp[5] - tmp[6];
Chris@49 310 (*Y4) = tmp[5] + tmp[6];
Chris@49 311
Chris@49 312 tmp[11] = tmp[0] + cx_type( ( (tmp[7].real() * b_real) + (tmp[8].real() * a_real) ), ( (tmp[7].imag() * b_real) + (tmp[8].imag() * a_real) ) );
Chris@49 313
Chris@49 314 tmp[12] = cx_type( ( -(tmp[10].imag() * b_imag) + (tmp[9].imag() * a_imag) ), ( (tmp[10].real() * b_imag) - (tmp[9].real() * a_imag) ) );
Chris@49 315
Chris@49 316 (*Y2) = tmp[11] + tmp[12];
Chris@49 317 (*Y3) = tmp[11] - tmp[12];
Chris@49 318
Chris@49 319 Y0++;
Chris@49 320 Y1++;
Chris@49 321 Y2++;
Chris@49 322 Y3++;
Chris@49 323 Y4++;
Chris@49 324 }
Chris@49 325 }
Chris@49 326
Chris@49 327
Chris@49 328
Chris@49 329 arma_hot
Chris@49 330 inline
Chris@49 331 void
Chris@49 332 butterfly_N(cx_type* Y, const uword stride, const uword m, const uword r)
Chris@49 333 {
Chris@49 334 arma_extra_debug_sigprint();
Chris@49 335
Chris@49 336 const cx_type* coeffs = coeffs_ptr();
Chris@49 337
Chris@49 338 tmp_array.set_min_size(r);
Chris@49 339 cx_type* tmp = tmp_array.memptr();
Chris@49 340
Chris@49 341 for(uword u=0; u < m; ++u)
Chris@49 342 {
Chris@49 343 uword k = u;
Chris@49 344
Chris@49 345 for(uword v=0; v < r; ++v)
Chris@49 346 {
Chris@49 347 tmp[v] = Y[k];
Chris@49 348 k += m;
Chris@49 349 }
Chris@49 350
Chris@49 351 k = u;
Chris@49 352
Chris@49 353 for(uword v=0; v < r; ++v)
Chris@49 354 {
Chris@49 355 Y[k] = tmp[0];
Chris@49 356
Chris@49 357 uword j = 0;
Chris@49 358
Chris@49 359 for(uword w=1; w < r; ++w)
Chris@49 360 {
Chris@49 361 j += stride * k;
Chris@49 362
Chris@49 363 if(j >= N) { j -= N; }
Chris@49 364
Chris@49 365 Y[k] += tmp[w] * coeffs[j];
Chris@49 366 }
Chris@49 367
Chris@49 368 k += m;
Chris@49 369 }
Chris@49 370 }
Chris@49 371 }
Chris@49 372
Chris@49 373
Chris@49 374
Chris@49 375 inline
Chris@49 376 void
Chris@49 377 run(cx_type* Y, const cx_type* X, const uword stage = 0, const uword stride = 1)
Chris@49 378 {
Chris@49 379 arma_extra_debug_sigprint();
Chris@49 380
Chris@49 381 const uword m = residue[stage];
Chris@49 382 const uword r = radix[stage];
Chris@49 383
Chris@49 384 const cx_type *Y_end = Y + r*m;
Chris@49 385
Chris@49 386 if(m == 1)
Chris@49 387 {
Chris@49 388 for(cx_type* Yi = Y; Yi != Y_end; Yi++, X += stride) { (*Yi) = (*X); }
Chris@49 389 }
Chris@49 390 else
Chris@49 391 {
Chris@49 392 const uword next_stage = stage + 1;
Chris@49 393 const uword next_stride = stride * r;
Chris@49 394
Chris@49 395 for(cx_type* Yi = Y; Yi != Y_end; Yi += m, X += stride) { run(Yi, X, next_stage, next_stride); }
Chris@49 396 }
Chris@49 397
Chris@49 398 switch(r)
Chris@49 399 {
Chris@49 400 case 2: butterfly_2(Y, stride, m ); break;
Chris@49 401 case 3: butterfly_3(Y, stride, m ); break;
Chris@49 402 case 4: butterfly_4(Y, stride, m ); break;
Chris@49 403 case 5: butterfly_5(Y, stride, m ); break;
Chris@49 404 default: butterfly_N(Y, stride, m, r); break;
Chris@49 405 }
Chris@49 406 }
Chris@49 407
Chris@49 408
Chris@49 409 };