c@289
|
1 /*
|
c@289
|
2 Copyright (c) 2003-2004, Mark Borgerding
|
c@289
|
3
|
c@289
|
4 All rights reserved.
|
c@289
|
5
|
c@289
|
6 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
|
c@289
|
7
|
c@289
|
8 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
|
c@289
|
9 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
|
c@289
|
10 * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
|
c@289
|
11
|
c@289
|
12 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
c@289
|
13 */
|
c@289
|
14
|
c@289
|
15
|
c@289
|
16 #include "_kiss_fft_guts.h"
|
c@289
|
17 /* The guts header contains all the multiplication and addition macros that are defined for
|
c@289
|
18 fixed or floating point complex numbers. It also delares the kf_ internal functions.
|
c@289
|
19 */
|
c@289
|
20
|
c@289
|
21 static kiss_fft_cpx *scratchbuf=NULL;
|
c@289
|
22 static size_t nscratchbuf=0;
|
c@289
|
23 static kiss_fft_cpx *tmpbuf=NULL;
|
c@289
|
24 static size_t ntmpbuf=0;
|
c@289
|
25
|
c@289
|
26 #define CHECKBUF(buf,nbuf,n) \
|
c@289
|
27 do { \
|
c@289
|
28 if ( nbuf < (size_t)(n) ) {\
|
c@289
|
29 free(buf); \
|
c@289
|
30 buf = (kiss_fft_cpx*)KISS_FFT_MALLOC(sizeof(kiss_fft_cpx)*(n)); \
|
c@289
|
31 nbuf = (size_t)(n); \
|
c@289
|
32 } \
|
c@289
|
33 }while(0)
|
c@289
|
34
|
c@289
|
35
|
c@289
|
36 static void kf_bfly2(
|
c@289
|
37 kiss_fft_cpx * Fout,
|
c@289
|
38 const size_t fstride,
|
c@289
|
39 const kiss_fft_cfg st,
|
c@289
|
40 int m
|
c@289
|
41 )
|
c@289
|
42 {
|
c@289
|
43 kiss_fft_cpx * Fout2;
|
c@289
|
44 kiss_fft_cpx * tw1 = st->twiddles;
|
c@289
|
45 kiss_fft_cpx t;
|
c@289
|
46 Fout2 = Fout + m;
|
c@289
|
47 do{
|
c@289
|
48 C_FIXDIV(*Fout,2); C_FIXDIV(*Fout2,2);
|
c@289
|
49
|
c@289
|
50 C_MUL (t, *Fout2 , *tw1);
|
c@289
|
51 tw1 += fstride;
|
c@289
|
52 C_SUB( *Fout2 , *Fout , t );
|
c@289
|
53 C_ADDTO( *Fout , t );
|
c@289
|
54 ++Fout2;
|
c@289
|
55 ++Fout;
|
c@289
|
56 }while (--m);
|
c@289
|
57 }
|
c@289
|
58
|
c@289
|
59 static void kf_bfly4(
|
c@289
|
60 kiss_fft_cpx * Fout,
|
c@289
|
61 const size_t fstride,
|
c@289
|
62 const kiss_fft_cfg st,
|
c@289
|
63 const size_t m
|
c@289
|
64 )
|
c@289
|
65 {
|
c@289
|
66 kiss_fft_cpx *tw1,*tw2,*tw3;
|
c@289
|
67 kiss_fft_cpx scratch[6];
|
c@289
|
68 size_t k=m;
|
c@289
|
69 const size_t m2=2*m;
|
c@289
|
70 const size_t m3=3*m;
|
c@289
|
71
|
c@289
|
72 tw3 = tw2 = tw1 = st->twiddles;
|
c@289
|
73
|
c@289
|
74 do {
|
c@289
|
75 C_FIXDIV(*Fout,4); C_FIXDIV(Fout[m],4); C_FIXDIV(Fout[m2],4); C_FIXDIV(Fout[m3],4);
|
c@289
|
76
|
c@289
|
77 C_MUL(scratch[0],Fout[m] , *tw1 );
|
c@289
|
78 C_MUL(scratch[1],Fout[m2] , *tw2 );
|
c@289
|
79 C_MUL(scratch[2],Fout[m3] , *tw3 );
|
c@289
|
80
|
c@289
|
81 C_SUB( scratch[5] , *Fout, scratch[1] );
|
c@289
|
82 C_ADDTO(*Fout, scratch[1]);
|
c@289
|
83 C_ADD( scratch[3] , scratch[0] , scratch[2] );
|
c@289
|
84 C_SUB( scratch[4] , scratch[0] , scratch[2] );
|
c@289
|
85 C_SUB( Fout[m2], *Fout, scratch[3] );
|
c@289
|
86 tw1 += fstride;
|
c@289
|
87 tw2 += fstride*2;
|
c@289
|
88 tw3 += fstride*3;
|
c@289
|
89 C_ADDTO( *Fout , scratch[3] );
|
c@289
|
90
|
c@289
|
91 if(st->inverse) {
|
c@289
|
92 Fout[m].r = scratch[5].r - scratch[4].i;
|
c@289
|
93 Fout[m].i = scratch[5].i + scratch[4].r;
|
c@289
|
94 Fout[m3].r = scratch[5].r + scratch[4].i;
|
c@289
|
95 Fout[m3].i = scratch[5].i - scratch[4].r;
|
c@289
|
96 }else{
|
c@289
|
97 Fout[m].r = scratch[5].r + scratch[4].i;
|
c@289
|
98 Fout[m].i = scratch[5].i - scratch[4].r;
|
c@289
|
99 Fout[m3].r = scratch[5].r - scratch[4].i;
|
c@289
|
100 Fout[m3].i = scratch[5].i + scratch[4].r;
|
c@289
|
101 }
|
c@289
|
102 ++Fout;
|
c@289
|
103 }while(--k);
|
c@289
|
104 }
|
c@289
|
105
|
c@289
|
106 static void kf_bfly3(
|
c@289
|
107 kiss_fft_cpx * Fout,
|
c@289
|
108 const size_t fstride,
|
c@289
|
109 const kiss_fft_cfg st,
|
c@289
|
110 size_t m
|
c@289
|
111 )
|
c@289
|
112 {
|
c@289
|
113 size_t k=m;
|
c@289
|
114 const size_t m2 = 2*m;
|
c@289
|
115 kiss_fft_cpx *tw1,*tw2;
|
c@289
|
116 kiss_fft_cpx scratch[5];
|
c@289
|
117 kiss_fft_cpx epi3;
|
c@289
|
118 epi3 = st->twiddles[fstride*m];
|
c@289
|
119
|
c@289
|
120 tw1=tw2=st->twiddles;
|
c@289
|
121
|
c@289
|
122 do{
|
c@289
|
123 C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
|
c@289
|
124
|
c@289
|
125 C_MUL(scratch[1],Fout[m] , *tw1);
|
c@289
|
126 C_MUL(scratch[2],Fout[m2] , *tw2);
|
c@289
|
127
|
c@289
|
128 C_ADD(scratch[3],scratch[1],scratch[2]);
|
c@289
|
129 C_SUB(scratch[0],scratch[1],scratch[2]);
|
c@289
|
130 tw1 += fstride;
|
c@289
|
131 tw2 += fstride*2;
|
c@289
|
132
|
c@289
|
133 Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
|
c@289
|
134 Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
|
c@289
|
135
|
c@289
|
136 C_MULBYSCALAR( scratch[0] , epi3.i );
|
c@289
|
137
|
c@289
|
138 C_ADDTO(*Fout,scratch[3]);
|
c@289
|
139
|
c@289
|
140 Fout[m2].r = Fout[m].r + scratch[0].i;
|
c@289
|
141 Fout[m2].i = Fout[m].i - scratch[0].r;
|
c@289
|
142
|
c@289
|
143 Fout[m].r -= scratch[0].i;
|
c@289
|
144 Fout[m].i += scratch[0].r;
|
c@289
|
145
|
c@289
|
146 ++Fout;
|
c@289
|
147 }while(--k);
|
c@289
|
148 }
|
c@289
|
149
|
c@289
|
150 static void kf_bfly5(
|
c@289
|
151 kiss_fft_cpx * Fout,
|
c@289
|
152 const size_t fstride,
|
c@289
|
153 const kiss_fft_cfg st,
|
c@289
|
154 int m
|
c@289
|
155 )
|
c@289
|
156 {
|
c@289
|
157 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
|
c@289
|
158 int u;
|
c@289
|
159 kiss_fft_cpx scratch[13];
|
c@289
|
160 kiss_fft_cpx * twiddles = st->twiddles;
|
c@289
|
161 kiss_fft_cpx *tw;
|
c@289
|
162 kiss_fft_cpx ya,yb;
|
c@289
|
163 ya = twiddles[fstride*m];
|
c@289
|
164 yb = twiddles[fstride*2*m];
|
c@289
|
165
|
c@289
|
166 Fout0=Fout;
|
c@289
|
167 Fout1=Fout0+m;
|
c@289
|
168 Fout2=Fout0+2*m;
|
c@289
|
169 Fout3=Fout0+3*m;
|
c@289
|
170 Fout4=Fout0+4*m;
|
c@289
|
171
|
c@289
|
172 tw=st->twiddles;
|
c@289
|
173 for ( u=0; u<m; ++u ) {
|
c@289
|
174 C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
|
c@289
|
175 scratch[0] = *Fout0;
|
c@289
|
176
|
c@289
|
177 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
|
c@289
|
178 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
|
c@289
|
179 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
|
c@289
|
180 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
|
c@289
|
181
|
c@289
|
182 C_ADD( scratch[7],scratch[1],scratch[4]);
|
c@289
|
183 C_SUB( scratch[10],scratch[1],scratch[4]);
|
c@289
|
184 C_ADD( scratch[8],scratch[2],scratch[3]);
|
c@289
|
185 C_SUB( scratch[9],scratch[2],scratch[3]);
|
c@289
|
186
|
c@289
|
187 Fout0->r += scratch[7].r + scratch[8].r;
|
c@289
|
188 Fout0->i += scratch[7].i + scratch[8].i;
|
c@289
|
189
|
c@289
|
190 scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
|
c@289
|
191 scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
|
c@289
|
192
|
c@289
|
193 scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
|
c@289
|
194 scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
|
c@289
|
195
|
c@289
|
196 C_SUB(*Fout1,scratch[5],scratch[6]);
|
c@289
|
197 C_ADD(*Fout4,scratch[5],scratch[6]);
|
c@289
|
198
|
c@289
|
199 scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
|
c@289
|
200 scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
|
c@289
|
201 scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
|
c@289
|
202 scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
|
c@289
|
203
|
c@289
|
204 C_ADD(*Fout2,scratch[11],scratch[12]);
|
c@289
|
205 C_SUB(*Fout3,scratch[11],scratch[12]);
|
c@289
|
206
|
c@289
|
207 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
|
c@289
|
208 }
|
c@289
|
209 }
|
c@289
|
210
|
c@289
|
211 /* perform the butterfly for one stage of a mixed radix FFT */
|
c@289
|
212 static void kf_bfly_generic(
|
c@289
|
213 kiss_fft_cpx * Fout,
|
c@289
|
214 const size_t fstride,
|
c@289
|
215 const kiss_fft_cfg st,
|
c@289
|
216 int m,
|
c@289
|
217 int p
|
c@289
|
218 )
|
c@289
|
219 {
|
c@289
|
220 int u,k,q1,q;
|
c@289
|
221 kiss_fft_cpx * twiddles = st->twiddles;
|
c@289
|
222 kiss_fft_cpx t;
|
c@289
|
223 int Norig = st->nfft;
|
c@289
|
224
|
c@289
|
225 CHECKBUF(scratchbuf,nscratchbuf,p);
|
c@289
|
226
|
c@289
|
227 for ( u=0; u<m; ++u ) {
|
c@289
|
228 k=u;
|
c@289
|
229 for ( q1=0 ; q1<p ; ++q1 ) {
|
c@289
|
230 scratchbuf[q1] = Fout[ k ];
|
c@289
|
231 C_FIXDIV(scratchbuf[q1],p);
|
c@289
|
232 k += m;
|
c@289
|
233 }
|
c@289
|
234
|
c@289
|
235 k=u;
|
c@289
|
236 for ( q1=0 ; q1<p ; ++q1 ) {
|
c@289
|
237 int twidx=0;
|
c@289
|
238 Fout[ k ] = scratchbuf[0];
|
c@289
|
239 for (q=1;q<p;++q ) {
|
c@289
|
240 twidx += fstride * k;
|
c@289
|
241 if (twidx>=Norig) twidx-=Norig;
|
c@289
|
242 C_MUL(t,scratchbuf[q] , twiddles[twidx] );
|
c@289
|
243 C_ADDTO( Fout[ k ] ,t);
|
c@289
|
244 }
|
c@289
|
245 k += m;
|
c@289
|
246 }
|
c@289
|
247 }
|
c@289
|
248 }
|
c@289
|
249
|
c@289
|
250 static
|
c@289
|
251 void kf_work(
|
c@289
|
252 kiss_fft_cpx * Fout,
|
c@289
|
253 const kiss_fft_cpx * f,
|
c@289
|
254 const size_t fstride,
|
c@289
|
255 int in_stride,
|
c@289
|
256 int * factors,
|
c@289
|
257 const kiss_fft_cfg st
|
c@289
|
258 )
|
c@289
|
259 {
|
c@289
|
260 kiss_fft_cpx * Fout_beg=Fout;
|
c@289
|
261 const int p=*factors++; /* the radix */
|
c@289
|
262 const int m=*factors++; /* stage's fft length/p */
|
c@289
|
263 const kiss_fft_cpx * Fout_end = Fout + p*m;
|
c@289
|
264
|
c@289
|
265 if (m==1) {
|
c@289
|
266 do{
|
c@289
|
267 *Fout = *f;
|
c@289
|
268 f += fstride*in_stride;
|
c@289
|
269 }while(++Fout != Fout_end );
|
c@289
|
270 }else{
|
c@289
|
271 do{
|
c@289
|
272 kf_work( Fout , f, fstride*p, in_stride, factors,st);
|
c@289
|
273 f += fstride*in_stride;
|
c@289
|
274 }while( (Fout += m) != Fout_end );
|
c@289
|
275 }
|
c@289
|
276
|
c@289
|
277 Fout=Fout_beg;
|
c@289
|
278
|
c@289
|
279 switch (p) {
|
c@289
|
280 case 2: kf_bfly2(Fout,fstride,st,m); break;
|
c@289
|
281 case 3: kf_bfly3(Fout,fstride,st,m); break;
|
c@289
|
282 case 4: kf_bfly4(Fout,fstride,st,m); break;
|
c@289
|
283 case 5: kf_bfly5(Fout,fstride,st,m); break;
|
c@289
|
284 default: kf_bfly_generic(Fout,fstride,st,m,p); break;
|
c@289
|
285 }
|
c@289
|
286 }
|
c@289
|
287
|
c@289
|
288 /* facbuf is populated by p1,m1,p2,m2, ...
|
c@289
|
289 where
|
c@289
|
290 p[i] * m[i] = m[i-1]
|
c@289
|
291 m0 = n */
|
c@289
|
292 static
|
c@289
|
293 void kf_factor(int n,int * facbuf)
|
c@289
|
294 {
|
c@289
|
295 int p=4;
|
c@289
|
296 double floor_sqrt;
|
c@289
|
297 floor_sqrt = floor( sqrt((double)n) );
|
c@289
|
298
|
c@289
|
299 /*factor out powers of 4, powers of 2, then any remaining primes */
|
c@289
|
300 do {
|
c@289
|
301 while (n % p) {
|
c@289
|
302 switch (p) {
|
c@289
|
303 case 4: p = 2; break;
|
c@289
|
304 case 2: p = 3; break;
|
c@289
|
305 default: p += 2; break;
|
c@289
|
306 }
|
c@289
|
307 if (p > floor_sqrt)
|
c@289
|
308 p = n; /* no more factors, skip to end */
|
c@289
|
309 }
|
c@289
|
310 n /= p;
|
c@289
|
311 *facbuf++ = p;
|
c@289
|
312 *facbuf++ = n;
|
c@289
|
313 } while (n > 1);
|
c@289
|
314 }
|
c@289
|
315
|
c@289
|
316 /*
|
c@289
|
317 *
|
c@289
|
318 * User-callable function to allocate all necessary storage space for the fft.
|
c@289
|
319 *
|
c@289
|
320 * The return value is a contiguous block of memory, allocated with malloc. As such,
|
c@289
|
321 * It can be freed with free(), rather than a kiss_fft-specific function.
|
c@289
|
322 * */
|
c@289
|
323 kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem )
|
c@289
|
324 {
|
c@289
|
325 kiss_fft_cfg st=NULL;
|
c@289
|
326 size_t memneeded = sizeof(struct kiss_fft_state)
|
c@289
|
327 + sizeof(kiss_fft_cpx)*(nfft-1); /* twiddle factors*/
|
c@289
|
328
|
c@289
|
329 if ( lenmem==NULL ) {
|
c@289
|
330 st = ( kiss_fft_cfg)KISS_FFT_MALLOC( memneeded );
|
c@289
|
331 }else{
|
c@289
|
332 if (mem != NULL && *lenmem >= memneeded)
|
c@289
|
333 st = (kiss_fft_cfg)mem;
|
c@289
|
334 *lenmem = memneeded;
|
c@289
|
335 }
|
c@289
|
336 if (st) {
|
c@289
|
337 int i;
|
c@289
|
338 st->nfft=nfft;
|
c@289
|
339 st->inverse = inverse_fft;
|
c@289
|
340
|
c@289
|
341 for (i=0;i<nfft;++i) {
|
c@289
|
342 const double pi=3.141592653589793238462643383279502884197169399375105820974944;
|
c@289
|
343 double phase = -2*pi*i / nfft;
|
c@289
|
344 if (st->inverse)
|
c@289
|
345 phase *= -1;
|
c@289
|
346 kf_cexp(st->twiddles+i, phase );
|
c@289
|
347 }
|
c@289
|
348
|
c@289
|
349 kf_factor(nfft,st->factors);
|
c@289
|
350 }
|
c@289
|
351 return st;
|
c@289
|
352 }
|
c@289
|
353
|
c@289
|
354
|
c@289
|
355
|
c@289
|
356
|
c@289
|
357 void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
|
c@289
|
358 {
|
c@289
|
359 if (fin == fout) {
|
c@289
|
360 CHECKBUF(tmpbuf,ntmpbuf,st->nfft);
|
c@289
|
361 kf_work(tmpbuf,fin,1,in_stride, st->factors,st);
|
c@289
|
362 memcpy(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft);
|
c@289
|
363 }else{
|
c@289
|
364 kf_work( fout, fin, 1,in_stride, st->factors,st );
|
c@289
|
365 }
|
c@289
|
366 }
|
c@289
|
367
|
c@289
|
368 void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
|
c@289
|
369 {
|
c@289
|
370 kiss_fft_stride(cfg,fin,fout,1);
|
c@289
|
371 }
|
c@289
|
372
|
c@289
|
373
|
c@289
|
374 /* not really necessary to call, but if someone is doing in-place ffts, they may want to free the
|
c@289
|
375 buffers from CHECKBUF
|
c@289
|
376 */
|
c@289
|
377 void kiss_fft_cleanup(void)
|
c@289
|
378 {
|
c@289
|
379 free(scratchbuf);
|
c@289
|
380 scratchbuf = NULL;
|
c@289
|
381 nscratchbuf=0;
|
c@289
|
382 free(tmpbuf);
|
c@289
|
383 tmpbuf=NULL;
|
c@289
|
384 ntmpbuf=0;
|
c@289
|
385 }
|
c@289
|
386
|
c@289
|
387 int kiss_fft_next_fast_size(int n)
|
c@289
|
388 {
|
c@289
|
389 while(1) {
|
c@289
|
390 int m=n;
|
c@289
|
391 while ( (m%2) == 0 ) m/=2;
|
c@289
|
392 while ( (m%3) == 0 ) m/=3;
|
c@289
|
393 while ( (m%5) == 0 ) m/=5;
|
c@289
|
394 if (m<=1)
|
c@289
|
395 break; /* n is completely factorable by twos, threes, and fives */
|
c@289
|
396 n++;
|
c@289
|
397 }
|
c@289
|
398 return n;
|
c@289
|
399 }
|