comparison src/vamp-sdk/ext/vamp_kiss_fft.c @ 501:90571dcc371a vamp-kiss-naming

Extensively rename things in the KissFFT headers to use a Vamp prefix. The motivation is not to change anything about the Vamp SDK library builds, but to avoid confusion in case any other code (for example that pulls in the Vamp SDK as part of a wider project definition) accidentally includes these headers instead of, or as well as, some other copy of KissFFT.
author Chris Cannam
date Tue, 30 Jan 2018 09:56:46 +0000
parents src/vamp-sdk/ext/kiss_fft.c@af4a1522ef15
children
comparison
equal deleted inserted replaced
500:4a86f866bb6b 501:90571dcc371a
1 /*
2 Copyright (c) 2003-2010, Mark Borgerding
3
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
9 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
10 * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
11
12 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
13 */
14
15
16 #include "vamp_kiss_fft_guts.h"
17 /* The guts header contains all the multiplication and addition macros that are defined for
18 fixed or floating point complex numbers. It also delares the kf_ internal functions.
19 */
20
21 static void kf_bfly2(
22 vamp_kiss_fft_cpx * Fout,
23 const size_t fstride,
24 const vamp_kiss_fft_cfg st,
25 int m
26 )
27 {
28 vamp_kiss_fft_cpx * Fout2;
29 vamp_kiss_fft_cpx * tw1 = st->twiddles;
30 vamp_kiss_fft_cpx t;
31 Fout2 = Fout + m;
32 do{
33 C_FIXDIV(*Fout,2); C_FIXDIV(*Fout2,2);
34
35 C_MUL (t, *Fout2 , *tw1);
36 tw1 += fstride;
37 C_SUB( *Fout2 , *Fout , t );
38 C_ADDTO( *Fout , t );
39 ++Fout2;
40 ++Fout;
41 }while (--m);
42 }
43
44 static void kf_bfly4(
45 vamp_kiss_fft_cpx * Fout,
46 const size_t fstride,
47 const vamp_kiss_fft_cfg st,
48 const size_t m
49 )
50 {
51 vamp_kiss_fft_cpx *tw1,*tw2,*tw3;
52 vamp_kiss_fft_cpx scratch[6];
53 size_t k=m;
54 const size_t m2=2*m;
55 const size_t m3=3*m;
56
57
58 tw3 = tw2 = tw1 = st->twiddles;
59
60 do {
61 C_FIXDIV(*Fout,4); C_FIXDIV(Fout[m],4); C_FIXDIV(Fout[m2],4); C_FIXDIV(Fout[m3],4);
62
63 C_MUL(scratch[0],Fout[m] , *tw1 );
64 C_MUL(scratch[1],Fout[m2] , *tw2 );
65 C_MUL(scratch[2],Fout[m3] , *tw3 );
66
67 C_SUB( scratch[5] , *Fout, scratch[1] );
68 C_ADDTO(*Fout, scratch[1]);
69 C_ADD( scratch[3] , scratch[0] , scratch[2] );
70 C_SUB( scratch[4] , scratch[0] , scratch[2] );
71 C_SUB( Fout[m2], *Fout, scratch[3] );
72 tw1 += fstride;
73 tw2 += fstride*2;
74 tw3 += fstride*3;
75 C_ADDTO( *Fout , scratch[3] );
76
77 if(st->inverse) {
78 Fout[m].r = scratch[5].r - scratch[4].i;
79 Fout[m].i = scratch[5].i + scratch[4].r;
80 Fout[m3].r = scratch[5].r + scratch[4].i;
81 Fout[m3].i = scratch[5].i - scratch[4].r;
82 }else{
83 Fout[m].r = scratch[5].r + scratch[4].i;
84 Fout[m].i = scratch[5].i - scratch[4].r;
85 Fout[m3].r = scratch[5].r - scratch[4].i;
86 Fout[m3].i = scratch[5].i + scratch[4].r;
87 }
88 ++Fout;
89 }while(--k);
90 }
91
92 static void kf_bfly3(
93 vamp_kiss_fft_cpx * Fout,
94 const size_t fstride,
95 const vamp_kiss_fft_cfg st,
96 size_t m
97 )
98 {
99 size_t k=m;
100 const size_t m2 = 2*m;
101 vamp_kiss_fft_cpx *tw1,*tw2;
102 vamp_kiss_fft_cpx scratch[5];
103 vamp_kiss_fft_cpx epi3;
104 epi3 = st->twiddles[fstride*m];
105
106 tw1=tw2=st->twiddles;
107
108 do{
109 C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
110
111 C_MUL(scratch[1],Fout[m] , *tw1);
112 C_MUL(scratch[2],Fout[m2] , *tw2);
113
114 C_ADD(scratch[3],scratch[1],scratch[2]);
115 C_SUB(scratch[0],scratch[1],scratch[2]);
116 tw1 += fstride;
117 tw2 += fstride*2;
118
119 Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
120 Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
121
122 C_MULBYSCALAR( scratch[0] , epi3.i );
123
124 C_ADDTO(*Fout,scratch[3]);
125
126 Fout[m2].r = Fout[m].r + scratch[0].i;
127 Fout[m2].i = Fout[m].i - scratch[0].r;
128
129 Fout[m].r -= scratch[0].i;
130 Fout[m].i += scratch[0].r;
131
132 ++Fout;
133 }while(--k);
134 }
135
136 static void kf_bfly5(
137 vamp_kiss_fft_cpx * Fout,
138 const size_t fstride,
139 const vamp_kiss_fft_cfg st,
140 int m
141 )
142 {
143 vamp_kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
144 int u;
145 vamp_kiss_fft_cpx scratch[13];
146 vamp_kiss_fft_cpx * twiddles = st->twiddles;
147 vamp_kiss_fft_cpx *tw;
148 vamp_kiss_fft_cpx ya,yb;
149 ya = twiddles[fstride*m];
150 yb = twiddles[fstride*2*m];
151
152 Fout0=Fout;
153 Fout1=Fout0+m;
154 Fout2=Fout0+2*m;
155 Fout3=Fout0+3*m;
156 Fout4=Fout0+4*m;
157
158 tw=st->twiddles;
159 for ( u=0; u<m; ++u ) {
160 C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
161 scratch[0] = *Fout0;
162
163 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
164 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
165 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
166 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
167
168 C_ADD( scratch[7],scratch[1],scratch[4]);
169 C_SUB( scratch[10],scratch[1],scratch[4]);
170 C_ADD( scratch[8],scratch[2],scratch[3]);
171 C_SUB( scratch[9],scratch[2],scratch[3]);
172
173 Fout0->r += scratch[7].r + scratch[8].r;
174 Fout0->i += scratch[7].i + scratch[8].i;
175
176 scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
177 scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
178
179 scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
180 scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
181
182 C_SUB(*Fout1,scratch[5],scratch[6]);
183 C_ADD(*Fout4,scratch[5],scratch[6]);
184
185 scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
186 scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
187 scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
188 scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
189
190 C_ADD(*Fout2,scratch[11],scratch[12]);
191 C_SUB(*Fout3,scratch[11],scratch[12]);
192
193 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
194 }
195 }
196
197 /* perform the butterfly for one stage of a mixed radix FFT */
198 static void kf_bfly_generic(
199 vamp_kiss_fft_cpx * Fout,
200 const size_t fstride,
201 const vamp_kiss_fft_cfg st,
202 int m,
203 int p
204 )
205 {
206 int u,k,q1,q;
207 vamp_kiss_fft_cpx * twiddles = st->twiddles;
208 vamp_kiss_fft_cpx t;
209 int Norig = st->nfft;
210
211 vamp_kiss_fft_cpx * scratch = (vamp_kiss_fft_cpx*)VAMP_KISS_FFT_TMP_ALLOC(sizeof(vamp_kiss_fft_cpx)*p);
212
213 for ( u=0; u<m; ++u ) {
214 k=u;
215 for ( q1=0 ; q1<p ; ++q1 ) {
216 scratch[q1] = Fout[ k ];
217 C_FIXDIV(scratch[q1],p);
218 k += m;
219 }
220
221 k=u;
222 for ( q1=0 ; q1<p ; ++q1 ) {
223 int twidx=0;
224 Fout[ k ] = scratch[0];
225 for (q=1;q<p;++q ) {
226 twidx += int(fstride * k);
227 if (twidx>=Norig) twidx-=Norig;
228 C_MUL(t,scratch[q] , twiddles[twidx] );
229 C_ADDTO( Fout[ k ] ,t);
230 }
231 k += m;
232 }
233 }
234 VAMP_KISS_FFT_TMP_FREE(scratch);
235 }
236
237 static
238 void kf_work(
239 vamp_kiss_fft_cpx * Fout,
240 const vamp_kiss_fft_cpx * f,
241 const size_t fstride,
242 int in_stride,
243 int * factors,
244 const vamp_kiss_fft_cfg st
245 )
246 {
247 vamp_kiss_fft_cpx * Fout_beg=Fout;
248 const int p=*factors++; /* the radix */
249 const int m=*factors++; /* stage's fft length/p */
250 const vamp_kiss_fft_cpx * Fout_end = Fout + p*m;
251
252 #ifdef _OPENMP
253 // use openmp extensions at the
254 // top-level (not recursive)
255 if (fstride==1 && p<=5)
256 {
257 int k;
258
259 // execute the p different work units in different threads
260 # pragma omp parallel for
261 for (k=0;k<p;++k)
262 kf_work( Fout +k*m, f+ fstride*in_stride*k,fstride*p,in_stride,factors,st);
263 // all threads have joined by this point
264
265 switch (p) {
266 case 2: kf_bfly2(Fout,fstride,st,m); break;
267 case 3: kf_bfly3(Fout,fstride,st,m); break;
268 case 4: kf_bfly4(Fout,fstride,st,m); break;
269 case 5: kf_bfly5(Fout,fstride,st,m); break;
270 default: kf_bfly_generic(Fout,fstride,st,m,p); break;
271 }
272 return;
273 }
274 #endif
275
276 if (m==1) {
277 do{
278 Fout->r = f->r;
279 Fout->i = f->i;
280 f += fstride*in_stride;
281 }while(++Fout != Fout_end );
282 }else{
283 do{
284 // recursive call:
285 // DFT of size m*p performed by doing
286 // p instances of smaller DFTs of size m,
287 // each one takes a decimated version of the input
288 kf_work( Fout , f, fstride*p, in_stride, factors,st);
289 f += fstride*in_stride;
290 }while( (Fout += m) != Fout_end );
291 }
292
293 Fout=Fout_beg;
294
295 // recombine the p smaller DFTs
296 switch (p) {
297 case 2: kf_bfly2(Fout,fstride,st,m); break;
298 case 3: kf_bfly3(Fout,fstride,st,m); break;
299 case 4: kf_bfly4(Fout,fstride,st,m); break;
300 case 5: kf_bfly5(Fout,fstride,st,m); break;
301 default: kf_bfly_generic(Fout,fstride,st,m,p); break;
302 }
303 }
304
305 /* facbuf is populated by p1,m1,p2,m2, ...
306 where
307 p[i] * m[i] = m[i-1]
308 m0 = n */
309 static
310 void kf_factor(int n,int * facbuf)
311 {
312 int p=4;
313 double floor_sqrt;
314 floor_sqrt = floor( sqrt((double)n) );
315
316 /*factor out powers of 4, powers of 2, then any remaining primes */
317 do {
318 while (n % p) {
319 switch (p) {
320 case 4: p = 2; break;
321 case 2: p = 3; break;
322 default: p += 2; break;
323 }
324 if (p > floor_sqrt)
325 p = n; /* no more factors, skip to end */
326 }
327 n /= p;
328 *facbuf++ = p;
329 *facbuf++ = n;
330 } while (n > 1);
331 }
332
333 /*
334 *
335 * User-callable function to allocate all necessary storage space for the fft.
336 *
337 * The return value is a contiguous block of memory, allocated with malloc. As such,
338 * It can be freed with free(), rather than a kiss_fft-specific function.
339 * */
340 vamp_kiss_fft_cfg vamp_kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem )
341 {
342 vamp_kiss_fft_cfg st=NULL;
343 size_t memneeded = sizeof(struct vamp_kiss_fft_state)
344 + sizeof(vamp_kiss_fft_cpx)*(nfft-1); /* twiddle factors*/
345
346 if ( lenmem==NULL ) {
347 st = ( vamp_kiss_fft_cfg)VAMP_KISS_FFT_MALLOC( memneeded );
348 }else{
349 if (mem != NULL && *lenmem >= memneeded)
350 st = (vamp_kiss_fft_cfg)mem;
351 *lenmem = memneeded;
352 }
353 if (st) {
354 int i;
355 st->nfft=nfft;
356 st->inverse = inverse_fft;
357
358 for (i=0;i<nfft;++i) {
359 const double pi=3.141592653589793238462643383279502884197169399375105820974944;
360 double phase = -2*pi*i / nfft;
361 if (st->inverse)
362 phase *= -1;
363 kf_cexp(st->twiddles+i, phase );
364 }
365
366 kf_factor(nfft,st->factors);
367 }
368 return st;
369 }
370
371 void vamp_kiss_fft_free(void *mem)
372 {
373 free(mem);
374 }
375
376 void vamp_kiss_fft_stride(vamp_kiss_fft_cfg st,const vamp_kiss_fft_cpx *fin,vamp_kiss_fft_cpx *fout,int in_stride)
377 {
378 if (fin == fout) {
379 //NOTE: this is not really an in-place FFT algorithm.
380 //It just performs an out-of-place FFT into a temp buffer
381 vamp_kiss_fft_cpx * tmpbuf = (vamp_kiss_fft_cpx*)VAMP_KISS_FFT_TMP_ALLOC( sizeof(vamp_kiss_fft_cpx)*st->nfft);
382 kf_work(tmpbuf,fin,1,in_stride, st->factors,st);
383 memcpy(fout,tmpbuf,sizeof(vamp_kiss_fft_cpx)*st->nfft);
384 VAMP_KISS_FFT_TMP_FREE(tmpbuf);
385 }else{
386 kf_work( fout, fin, 1,in_stride, st->factors,st );
387 }
388 }
389
390 void vamp_kiss_fft(vamp_kiss_fft_cfg cfg,const vamp_kiss_fft_cpx *fin,vamp_kiss_fft_cpx *fout)
391 {
392 vamp_kiss_fft_stride(cfg,fin,fout,1);
393 }
394
395
396 void vamp_kiss_fft_cleanup(void)
397 {
398 // nothing needed any more
399 }
400
401 int vamp_kiss_fft_next_fast_size(int n)
402 {
403 while(1) {
404 int m=n;
405 while ( (m%2) == 0 ) m/=2;
406 while ( (m%3) == 0 ) m/=3;
407 while ( (m%5) == 0 ) m/=5;
408 if (m<=1)
409 break; /* n is completely factorable by twos, threes, and fives */
410 n++;
411 }
412 return n;
413 }