Chris@49
|
1 // This Source Code Form is a compilation of:
|
Chris@49
|
2 // (1) source code written by Conrad Sanderson, and
|
Chris@49
|
3 // (2) a modified form of source code referred to as "kissfft.hh".
|
Chris@49
|
4 //
|
Chris@49
|
5 // This compilation is Copyright (C) 2013 Conrad Sanderson
|
Chris@49
|
6 // and is subject to the terms of the Mozilla Public License, v. 2.0.
|
Chris@49
|
7 //
|
Chris@49
|
8 // The source code that is distinct and separate from "kissfft.hh"
|
Chris@49
|
9 // is Copyright (C) 2013 Conrad Sanderson and is subject to the
|
Chris@49
|
10 // terms of the Mozilla Public License, v. 2.0.
|
Chris@49
|
11 //
|
Chris@49
|
12 // If a copy of the MPL was not distributed with this file,
|
Chris@49
|
13 // You can obtain one at http://mozilla.org/MPL/2.0/.
|
Chris@49
|
14 //
|
Chris@49
|
15 // The original "kissfft.hh" source code is licensed under a 3-clause BSD license,
|
Chris@49
|
16 // as follows:
|
Chris@49
|
17 //
|
Chris@49
|
18 // Copyright (c) 2003-2010 Mark Borgerding
|
Chris@49
|
19 //
|
Chris@49
|
20 // All rights reserved.
|
Chris@49
|
21 //
|
Chris@49
|
22 // Redistribution and use in source and binary forms, with or without modification,
|
Chris@49
|
23 // are permitted provided that the following conditions are met:
|
Chris@49
|
24 //
|
Chris@49
|
25 // * Redistributions of source code must retain the above copyright notice,
|
Chris@49
|
26 // this list of conditions and the following disclaimer.
|
Chris@49
|
27 //
|
Chris@49
|
28 // * Redistributions in binary form must reproduce the above copyright notice,
|
Chris@49
|
29 // this list of conditions and the following disclaimer in the documentation
|
Chris@49
|
30 // and/or other materials provided with the distribution.
|
Chris@49
|
31 //
|
Chris@49
|
32 // * Neither the author nor the names of any contributors may be used to endorse or promote
|
Chris@49
|
33 // products derived from this software without specific prior written permission.
|
Chris@49
|
34 //
|
Chris@49
|
35 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
|
Chris@49
|
36 // OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
|
Chris@49
|
37 // AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
Chris@49
|
38 // OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
|
Chris@49
|
39 // OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
Chris@49
|
40 // OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
Chris@49
|
41 // ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
Chris@49
|
42 // OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
Chris@49
|
43
|
Chris@49
|
44
|
Chris@49
|
45
|
Chris@49
|
46 template<typename cx_type, uword fixed_N, bool> struct store {};
|
Chris@49
|
47
|
Chris@49
|
48 template<typename cx_type, uword fixed_N>
|
Chris@49
|
49 struct store<cx_type, fixed_N, true>
|
Chris@49
|
50 {
|
Chris@49
|
51 static const uword N = fixed_N;
|
Chris@49
|
52
|
Chris@49
|
53 arma_aligned cx_type coeffs_array[fixed_N];
|
Chris@49
|
54
|
Chris@49
|
55 inline store() {}
|
Chris@49
|
56 inline store(uword) {}
|
Chris@49
|
57
|
Chris@49
|
58 arma_inline cx_type* coeffs_ptr() { return &coeffs_array[0]; }
|
Chris@49
|
59 arma_inline const cx_type* coeffs_ptr() const { return &coeffs_array[0]; }
|
Chris@49
|
60 };
|
Chris@49
|
61
|
Chris@49
|
62
|
Chris@49
|
63
|
Chris@49
|
64 template<typename cx_type, uword fixed_N>
|
Chris@49
|
65 struct store<cx_type, fixed_N, false>
|
Chris@49
|
66 {
|
Chris@49
|
67 const uword N;
|
Chris@49
|
68
|
Chris@49
|
69 podarray<cx_type> coeffs_array;
|
Chris@49
|
70
|
Chris@49
|
71 inline store() : N(0) {}
|
Chris@49
|
72 inline store(uword in_N) : N(in_N) { coeffs_array.set_size(N); }
|
Chris@49
|
73
|
Chris@49
|
74 arma_inline cx_type* coeffs_ptr() { return coeffs_array.memptr(); }
|
Chris@49
|
75 arma_inline const cx_type* coeffs_ptr() const { return coeffs_array.memptr(); }
|
Chris@49
|
76 };
|
Chris@49
|
77
|
Chris@49
|
78
|
Chris@49
|
79
|
Chris@49
|
80 template<typename cx_type, bool inverse, uword fixed_N = 0>
|
Chris@49
|
81 class fft_engine : public store<cx_type, fixed_N, (fixed_N > 0)>
|
Chris@49
|
82 {
|
Chris@49
|
83 public:
|
Chris@49
|
84
|
Chris@49
|
85 typedef typename get_pod_type<cx_type>::result T;
|
Chris@49
|
86
|
Chris@49
|
87 using store<cx_type, fixed_N, (fixed_N > 0)>::N;
|
Chris@49
|
88 using store<cx_type, fixed_N, (fixed_N > 0)>::coeffs_ptr;
|
Chris@49
|
89
|
Chris@49
|
90 podarray<uword> residue;
|
Chris@49
|
91 podarray<uword> radix;
|
Chris@49
|
92
|
Chris@49
|
93 podarray<cx_type> tmp_array;
|
Chris@49
|
94
|
Chris@49
|
95
|
Chris@49
|
96 template<bool fill>
|
Chris@49
|
97 inline
|
Chris@49
|
98 uword
|
Chris@49
|
99 calc_radix()
|
Chris@49
|
100 {
|
Chris@49
|
101 uword i = 0;
|
Chris@49
|
102
|
Chris@49
|
103 for(uword n = N, r=4; n >= 2; ++i)
|
Chris@49
|
104 {
|
Chris@49
|
105 while( (n % r) > 0 )
|
Chris@49
|
106 {
|
Chris@49
|
107 switch(r)
|
Chris@49
|
108 {
|
Chris@49
|
109 case 2: r = 3; break;
|
Chris@49
|
110 case 4: r = 2; break;
|
Chris@49
|
111 default: r += 2; break;
|
Chris@49
|
112 }
|
Chris@49
|
113
|
Chris@49
|
114 if(r*r > n) { r = n; }
|
Chris@49
|
115 }
|
Chris@49
|
116
|
Chris@49
|
117 n /= r;
|
Chris@49
|
118
|
Chris@49
|
119 if(fill)
|
Chris@49
|
120 {
|
Chris@49
|
121 residue[i] = n;
|
Chris@49
|
122 radix[i] = r;
|
Chris@49
|
123 }
|
Chris@49
|
124 }
|
Chris@49
|
125
|
Chris@49
|
126 return i;
|
Chris@49
|
127 }
|
Chris@49
|
128
|
Chris@49
|
129
|
Chris@49
|
130
|
Chris@49
|
131 inline
|
Chris@49
|
132 fft_engine(const uword in_N)
|
Chris@49
|
133 : store< cx_type, fixed_N, (fixed_N > 0) >(in_N)
|
Chris@49
|
134 {
|
Chris@49
|
135 arma_extra_debug_sigprint();
|
Chris@49
|
136
|
Chris@49
|
137 const uword len = calc_radix<false>();
|
Chris@49
|
138
|
Chris@49
|
139 residue.set_size(len);
|
Chris@49
|
140 radix.set_size(len);
|
Chris@49
|
141
|
Chris@49
|
142 calc_radix<true>();
|
Chris@49
|
143
|
Chris@49
|
144
|
Chris@49
|
145 // calculate the constant coefficients
|
Chris@49
|
146
|
Chris@49
|
147 cx_type* coeffs = coeffs_ptr();
|
Chris@49
|
148
|
Chris@49
|
149 const T k = T( (inverse) ? +2 : -2 ) * std::acos( T(-1) ) / T(N);
|
Chris@49
|
150
|
Chris@49
|
151 for(uword i=0; i < N; ++i) { coeffs[i] = std::exp( cx_type(T(0), i*k) ); }
|
Chris@49
|
152 }
|
Chris@49
|
153
|
Chris@49
|
154
|
Chris@49
|
155
|
Chris@49
|
156 arma_hot
|
Chris@49
|
157 inline
|
Chris@49
|
158 void
|
Chris@49
|
159 butterfly_2(cx_type* Y, const uword stride, const uword m)
|
Chris@49
|
160 {
|
Chris@49
|
161 arma_extra_debug_sigprint();
|
Chris@49
|
162
|
Chris@49
|
163 const cx_type* coeffs = coeffs_ptr();
|
Chris@49
|
164
|
Chris@49
|
165 for(uword i=0; i < m; ++i)
|
Chris@49
|
166 {
|
Chris@49
|
167 const cx_type t = Y[i+m] * coeffs[i*stride];
|
Chris@49
|
168
|
Chris@49
|
169 Y[i+m] = Y[i] - t;
|
Chris@49
|
170 Y[i ] += t;
|
Chris@49
|
171 }
|
Chris@49
|
172 }
|
Chris@49
|
173
|
Chris@49
|
174
|
Chris@49
|
175
|
Chris@49
|
176 arma_hot
|
Chris@49
|
177 inline
|
Chris@49
|
178 void
|
Chris@49
|
179 butterfly_3(cx_type* Y, const uword stride, const uword m)
|
Chris@49
|
180 {
|
Chris@49
|
181 arma_extra_debug_sigprint();
|
Chris@49
|
182
|
Chris@49
|
183 arma_aligned cx_type tmp[5];
|
Chris@49
|
184
|
Chris@49
|
185 cx_type* coeffs1 = coeffs_ptr();
|
Chris@49
|
186 cx_type* coeffs2 = coeffs1;
|
Chris@49
|
187
|
Chris@49
|
188 const T coeff_sm_imag = coeffs1[stride*m].imag();
|
Chris@49
|
189
|
Chris@49
|
190 const uword n = m*2;
|
Chris@49
|
191
|
Chris@49
|
192 // TODO: rearrange the indices within tmp[] into a more sane order
|
Chris@49
|
193
|
Chris@49
|
194 for(uword i = m; i > 0; --i)
|
Chris@49
|
195 {
|
Chris@49
|
196 tmp[1] = Y[m] * (*coeffs1);
|
Chris@49
|
197 tmp[2] = Y[n] * (*coeffs2);
|
Chris@49
|
198
|
Chris@49
|
199 tmp[0] = tmp[1] - tmp[2];
|
Chris@49
|
200 tmp[0] *= coeff_sm_imag;
|
Chris@49
|
201
|
Chris@49
|
202 tmp[3] = tmp[1] + tmp[2];
|
Chris@49
|
203
|
Chris@49
|
204 Y[m] = cx_type( (Y[0].real() - (0.5*tmp[3].real())), (Y[0].imag() - (0.5*tmp[3].imag())) );
|
Chris@49
|
205
|
Chris@49
|
206 Y[0] += tmp[3];
|
Chris@49
|
207
|
Chris@49
|
208
|
Chris@49
|
209 Y[n] = cx_type( (Y[m].real() + tmp[0].imag()), (Y[m].imag() - tmp[0].real()) );
|
Chris@49
|
210
|
Chris@49
|
211 Y[m] += cx_type( -tmp[0].imag(), tmp[0].real() );
|
Chris@49
|
212
|
Chris@49
|
213 Y++;
|
Chris@49
|
214
|
Chris@49
|
215 coeffs1 += stride;
|
Chris@49
|
216 coeffs2 += stride*2;
|
Chris@49
|
217 }
|
Chris@49
|
218 }
|
Chris@49
|
219
|
Chris@49
|
220
|
Chris@49
|
221
|
Chris@49
|
222 arma_hot
|
Chris@49
|
223 inline
|
Chris@49
|
224 void
|
Chris@49
|
225 butterfly_4(cx_type* Y, const uword stride, const uword m)
|
Chris@49
|
226 {
|
Chris@49
|
227 arma_extra_debug_sigprint();
|
Chris@49
|
228
|
Chris@49
|
229 arma_aligned cx_type tmp[7];
|
Chris@49
|
230
|
Chris@49
|
231 const cx_type* coeffs = coeffs_ptr();
|
Chris@49
|
232
|
Chris@49
|
233 const uword m2 = m*2;
|
Chris@49
|
234 const uword m3 = m*3;
|
Chris@49
|
235
|
Chris@49
|
236 // TODO: rearrange the indices within tmp[] into a more sane order
|
Chris@49
|
237
|
Chris@49
|
238 for(uword i=0; i < m; ++i)
|
Chris@49
|
239 {
|
Chris@49
|
240 tmp[0] = Y[i + m ] * coeffs[i*stride ];
|
Chris@49
|
241 tmp[2] = Y[i + m3] * coeffs[i*stride*3];
|
Chris@49
|
242 tmp[3] = tmp[0] + tmp[2];
|
Chris@49
|
243
|
Chris@49
|
244 //tmp[4] = tmp[0] - tmp[2];
|
Chris@49
|
245 //tmp[4] = (inverse) ? cx_type( -(tmp[4].imag()), tmp[4].real() ) : cx_type( tmp[4].imag(), -tmp[4].real() );
|
Chris@49
|
246
|
Chris@49
|
247 tmp[4] = (inverse)
|
Chris@49
|
248 ? cx_type( (tmp[2].imag() - tmp[0].imag()), (tmp[0].real() - tmp[2].real()) )
|
Chris@49
|
249 : cx_type( (tmp[0].imag() - tmp[2].imag()), (tmp[2].real() - tmp[0].real()) );
|
Chris@49
|
250
|
Chris@49
|
251 tmp[1] = Y[i + m2] * coeffs[i*stride*2];
|
Chris@49
|
252 tmp[5] = Y[i] - tmp[1];
|
Chris@49
|
253
|
Chris@49
|
254
|
Chris@49
|
255 Y[i ] += tmp[1];
|
Chris@49
|
256 Y[i + m2] = Y[i] - tmp[3];
|
Chris@49
|
257 Y[i ] += tmp[3];
|
Chris@49
|
258 Y[i + m ] = tmp[5] + tmp[4];
|
Chris@49
|
259 Y[i + m3] = tmp[5] - tmp[4];
|
Chris@49
|
260 }
|
Chris@49
|
261 }
|
Chris@49
|
262
|
Chris@49
|
263
|
Chris@49
|
264
|
Chris@49
|
265 inline
|
Chris@49
|
266 arma_hot
|
Chris@49
|
267 void
|
Chris@49
|
268 butterfly_5(cx_type* Y, const uword stride, const uword m)
|
Chris@49
|
269 {
|
Chris@49
|
270 arma_extra_debug_sigprint();
|
Chris@49
|
271
|
Chris@49
|
272 arma_aligned cx_type tmp[13];
|
Chris@49
|
273
|
Chris@49
|
274 const cx_type* coeffs = coeffs_ptr();
|
Chris@49
|
275
|
Chris@49
|
276 const T a_real = coeffs[stride*1*m].real();
|
Chris@49
|
277 const T a_imag = coeffs[stride*1*m].imag();
|
Chris@49
|
278
|
Chris@49
|
279 const T b_real = coeffs[stride*2*m].real();
|
Chris@49
|
280 const T b_imag = coeffs[stride*2*m].imag();
|
Chris@49
|
281
|
Chris@49
|
282 cx_type* Y0 = Y;
|
Chris@49
|
283 cx_type* Y1 = Y + 1*m;
|
Chris@49
|
284 cx_type* Y2 = Y + 2*m;
|
Chris@49
|
285 cx_type* Y3 = Y + 3*m;
|
Chris@49
|
286 cx_type* Y4 = Y + 4*m;
|
Chris@49
|
287
|
Chris@49
|
288 for(uword i=0; i < m; ++i)
|
Chris@49
|
289 {
|
Chris@49
|
290 tmp[0] = (*Y0);
|
Chris@49
|
291
|
Chris@49
|
292 tmp[1] = (*Y1) * coeffs[stride*1*i];
|
Chris@49
|
293 tmp[2] = (*Y2) * coeffs[stride*2*i];
|
Chris@49
|
294 tmp[3] = (*Y3) * coeffs[stride*3*i];
|
Chris@49
|
295 tmp[4] = (*Y4) * coeffs[stride*4*i];
|
Chris@49
|
296
|
Chris@49
|
297 tmp[7] = tmp[1] + tmp[4];
|
Chris@49
|
298 tmp[8] = tmp[2] + tmp[3];
|
Chris@49
|
299 tmp[9] = tmp[2] - tmp[3];
|
Chris@49
|
300 tmp[10] = tmp[1] - tmp[4];
|
Chris@49
|
301
|
Chris@49
|
302 (*Y0) += tmp[7];
|
Chris@49
|
303 (*Y0) += tmp[8];
|
Chris@49
|
304
|
Chris@49
|
305 tmp[5] = tmp[0] + cx_type( ( (tmp[7].real() * a_real) + (tmp[8].real() * b_real) ), ( (tmp[7].imag() * a_real) + (tmp[8].imag() * b_real) ) );
|
Chris@49
|
306
|
Chris@49
|
307 tmp[6] = cx_type( ( (tmp[10].imag() * a_imag) + (tmp[9].imag() * b_imag) ), ( -(tmp[10].real() * a_imag) - (tmp[9].real() * b_imag) ) );
|
Chris@49
|
308
|
Chris@49
|
309 (*Y1) = tmp[5] - tmp[6];
|
Chris@49
|
310 (*Y4) = tmp[5] + tmp[6];
|
Chris@49
|
311
|
Chris@49
|
312 tmp[11] = tmp[0] + cx_type( ( (tmp[7].real() * b_real) + (tmp[8].real() * a_real) ), ( (tmp[7].imag() * b_real) + (tmp[8].imag() * a_real) ) );
|
Chris@49
|
313
|
Chris@49
|
314 tmp[12] = cx_type( ( -(tmp[10].imag() * b_imag) + (tmp[9].imag() * a_imag) ), ( (tmp[10].real() * b_imag) - (tmp[9].real() * a_imag) ) );
|
Chris@49
|
315
|
Chris@49
|
316 (*Y2) = tmp[11] + tmp[12];
|
Chris@49
|
317 (*Y3) = tmp[11] - tmp[12];
|
Chris@49
|
318
|
Chris@49
|
319 Y0++;
|
Chris@49
|
320 Y1++;
|
Chris@49
|
321 Y2++;
|
Chris@49
|
322 Y3++;
|
Chris@49
|
323 Y4++;
|
Chris@49
|
324 }
|
Chris@49
|
325 }
|
Chris@49
|
326
|
Chris@49
|
327
|
Chris@49
|
328
|
Chris@49
|
329 arma_hot
|
Chris@49
|
330 inline
|
Chris@49
|
331 void
|
Chris@49
|
332 butterfly_N(cx_type* Y, const uword stride, const uword m, const uword r)
|
Chris@49
|
333 {
|
Chris@49
|
334 arma_extra_debug_sigprint();
|
Chris@49
|
335
|
Chris@49
|
336 const cx_type* coeffs = coeffs_ptr();
|
Chris@49
|
337
|
Chris@49
|
338 tmp_array.set_min_size(r);
|
Chris@49
|
339 cx_type* tmp = tmp_array.memptr();
|
Chris@49
|
340
|
Chris@49
|
341 for(uword u=0; u < m; ++u)
|
Chris@49
|
342 {
|
Chris@49
|
343 uword k = u;
|
Chris@49
|
344
|
Chris@49
|
345 for(uword v=0; v < r; ++v)
|
Chris@49
|
346 {
|
Chris@49
|
347 tmp[v] = Y[k];
|
Chris@49
|
348 k += m;
|
Chris@49
|
349 }
|
Chris@49
|
350
|
Chris@49
|
351 k = u;
|
Chris@49
|
352
|
Chris@49
|
353 for(uword v=0; v < r; ++v)
|
Chris@49
|
354 {
|
Chris@49
|
355 Y[k] = tmp[0];
|
Chris@49
|
356
|
Chris@49
|
357 uword j = 0;
|
Chris@49
|
358
|
Chris@49
|
359 for(uword w=1; w < r; ++w)
|
Chris@49
|
360 {
|
Chris@49
|
361 j += stride * k;
|
Chris@49
|
362
|
Chris@49
|
363 if(j >= N) { j -= N; }
|
Chris@49
|
364
|
Chris@49
|
365 Y[k] += tmp[w] * coeffs[j];
|
Chris@49
|
366 }
|
Chris@49
|
367
|
Chris@49
|
368 k += m;
|
Chris@49
|
369 }
|
Chris@49
|
370 }
|
Chris@49
|
371 }
|
Chris@49
|
372
|
Chris@49
|
373
|
Chris@49
|
374
|
Chris@49
|
375 inline
|
Chris@49
|
376 void
|
Chris@49
|
377 run(cx_type* Y, const cx_type* X, const uword stage = 0, const uword stride = 1)
|
Chris@49
|
378 {
|
Chris@49
|
379 arma_extra_debug_sigprint();
|
Chris@49
|
380
|
Chris@49
|
381 const uword m = residue[stage];
|
Chris@49
|
382 const uword r = radix[stage];
|
Chris@49
|
383
|
Chris@49
|
384 const cx_type *Y_end = Y + r*m;
|
Chris@49
|
385
|
Chris@49
|
386 if(m == 1)
|
Chris@49
|
387 {
|
Chris@49
|
388 for(cx_type* Yi = Y; Yi != Y_end; Yi++, X += stride) { (*Yi) = (*X); }
|
Chris@49
|
389 }
|
Chris@49
|
390 else
|
Chris@49
|
391 {
|
Chris@49
|
392 const uword next_stage = stage + 1;
|
Chris@49
|
393 const uword next_stride = stride * r;
|
Chris@49
|
394
|
Chris@49
|
395 for(cx_type* Yi = Y; Yi != Y_end; Yi += m, X += stride) { run(Yi, X, next_stage, next_stride); }
|
Chris@49
|
396 }
|
Chris@49
|
397
|
Chris@49
|
398 switch(r)
|
Chris@49
|
399 {
|
Chris@49
|
400 case 2: butterfly_2(Y, stride, m ); break;
|
Chris@49
|
401 case 3: butterfly_3(Y, stride, m ); break;
|
Chris@49
|
402 case 4: butterfly_4(Y, stride, m ); break;
|
Chris@49
|
403 case 5: butterfly_5(Y, stride, m ); break;
|
Chris@49
|
404 default: butterfly_N(Y, stride, m, r); break;
|
Chris@49
|
405 }
|
Chris@49
|
406 }
|
Chris@49
|
407
|
Chris@49
|
408
|
Chris@49
|
409 };
|