annotate dsp/transforms/kissfft/kiss_fft.c @ 289:befe5aa6b450

* Refactor FFT a little bit so as to separate construction and processing rather than have a single static method -- will make it easier to use a different implementation * pull in KissFFT implementation (not hooked up yet)
author Chris Cannam <c.cannam@qmul.ac.uk>
date Wed, 13 May 2009 09:19:12 +0000
parents
children
rev   line source
c@289 1 /*
c@289 2 Copyright (c) 2003-2004, Mark Borgerding
c@289 3
c@289 4 All rights reserved.
c@289 5
c@289 6 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
c@289 7
c@289 8 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
c@289 9 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
c@289 10 * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
c@289 11
c@289 12 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
c@289 13 */
c@289 14
c@289 15
c@289 16 #include "_kiss_fft_guts.h"
c@289 17 /* The guts header contains all the multiplication and addition macros that are defined for
c@289 18 fixed or floating point complex numbers. It also delares the kf_ internal functions.
c@289 19 */
c@289 20
c@289 21 static kiss_fft_cpx *scratchbuf=NULL;
c@289 22 static size_t nscratchbuf=0;
c@289 23 static kiss_fft_cpx *tmpbuf=NULL;
c@289 24 static size_t ntmpbuf=0;
c@289 25
c@289 26 #define CHECKBUF(buf,nbuf,n) \
c@289 27 do { \
c@289 28 if ( nbuf < (size_t)(n) ) {\
c@289 29 free(buf); \
c@289 30 buf = (kiss_fft_cpx*)KISS_FFT_MALLOC(sizeof(kiss_fft_cpx)*(n)); \
c@289 31 nbuf = (size_t)(n); \
c@289 32 } \
c@289 33 }while(0)
c@289 34
c@289 35
c@289 36 static void kf_bfly2(
c@289 37 kiss_fft_cpx * Fout,
c@289 38 const size_t fstride,
c@289 39 const kiss_fft_cfg st,
c@289 40 int m
c@289 41 )
c@289 42 {
c@289 43 kiss_fft_cpx * Fout2;
c@289 44 kiss_fft_cpx * tw1 = st->twiddles;
c@289 45 kiss_fft_cpx t;
c@289 46 Fout2 = Fout + m;
c@289 47 do{
c@289 48 C_FIXDIV(*Fout,2); C_FIXDIV(*Fout2,2);
c@289 49
c@289 50 C_MUL (t, *Fout2 , *tw1);
c@289 51 tw1 += fstride;
c@289 52 C_SUB( *Fout2 , *Fout , t );
c@289 53 C_ADDTO( *Fout , t );
c@289 54 ++Fout2;
c@289 55 ++Fout;
c@289 56 }while (--m);
c@289 57 }
c@289 58
c@289 59 static void kf_bfly4(
c@289 60 kiss_fft_cpx * Fout,
c@289 61 const size_t fstride,
c@289 62 const kiss_fft_cfg st,
c@289 63 const size_t m
c@289 64 )
c@289 65 {
c@289 66 kiss_fft_cpx *tw1,*tw2,*tw3;
c@289 67 kiss_fft_cpx scratch[6];
c@289 68 size_t k=m;
c@289 69 const size_t m2=2*m;
c@289 70 const size_t m3=3*m;
c@289 71
c@289 72 tw3 = tw2 = tw1 = st->twiddles;
c@289 73
c@289 74 do {
c@289 75 C_FIXDIV(*Fout,4); C_FIXDIV(Fout[m],4); C_FIXDIV(Fout[m2],4); C_FIXDIV(Fout[m3],4);
c@289 76
c@289 77 C_MUL(scratch[0],Fout[m] , *tw1 );
c@289 78 C_MUL(scratch[1],Fout[m2] , *tw2 );
c@289 79 C_MUL(scratch[2],Fout[m3] , *tw3 );
c@289 80
c@289 81 C_SUB( scratch[5] , *Fout, scratch[1] );
c@289 82 C_ADDTO(*Fout, scratch[1]);
c@289 83 C_ADD( scratch[3] , scratch[0] , scratch[2] );
c@289 84 C_SUB( scratch[4] , scratch[0] , scratch[2] );
c@289 85 C_SUB( Fout[m2], *Fout, scratch[3] );
c@289 86 tw1 += fstride;
c@289 87 tw2 += fstride*2;
c@289 88 tw3 += fstride*3;
c@289 89 C_ADDTO( *Fout , scratch[3] );
c@289 90
c@289 91 if(st->inverse) {
c@289 92 Fout[m].r = scratch[5].r - scratch[4].i;
c@289 93 Fout[m].i = scratch[5].i + scratch[4].r;
c@289 94 Fout[m3].r = scratch[5].r + scratch[4].i;
c@289 95 Fout[m3].i = scratch[5].i - scratch[4].r;
c@289 96 }else{
c@289 97 Fout[m].r = scratch[5].r + scratch[4].i;
c@289 98 Fout[m].i = scratch[5].i - scratch[4].r;
c@289 99 Fout[m3].r = scratch[5].r - scratch[4].i;
c@289 100 Fout[m3].i = scratch[5].i + scratch[4].r;
c@289 101 }
c@289 102 ++Fout;
c@289 103 }while(--k);
c@289 104 }
c@289 105
c@289 106 static void kf_bfly3(
c@289 107 kiss_fft_cpx * Fout,
c@289 108 const size_t fstride,
c@289 109 const kiss_fft_cfg st,
c@289 110 size_t m
c@289 111 )
c@289 112 {
c@289 113 size_t k=m;
c@289 114 const size_t m2 = 2*m;
c@289 115 kiss_fft_cpx *tw1,*tw2;
c@289 116 kiss_fft_cpx scratch[5];
c@289 117 kiss_fft_cpx epi3;
c@289 118 epi3 = st->twiddles[fstride*m];
c@289 119
c@289 120 tw1=tw2=st->twiddles;
c@289 121
c@289 122 do{
c@289 123 C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
c@289 124
c@289 125 C_MUL(scratch[1],Fout[m] , *tw1);
c@289 126 C_MUL(scratch[2],Fout[m2] , *tw2);
c@289 127
c@289 128 C_ADD(scratch[3],scratch[1],scratch[2]);
c@289 129 C_SUB(scratch[0],scratch[1],scratch[2]);
c@289 130 tw1 += fstride;
c@289 131 tw2 += fstride*2;
c@289 132
c@289 133 Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
c@289 134 Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
c@289 135
c@289 136 C_MULBYSCALAR( scratch[0] , epi3.i );
c@289 137
c@289 138 C_ADDTO(*Fout,scratch[3]);
c@289 139
c@289 140 Fout[m2].r = Fout[m].r + scratch[0].i;
c@289 141 Fout[m2].i = Fout[m].i - scratch[0].r;
c@289 142
c@289 143 Fout[m].r -= scratch[0].i;
c@289 144 Fout[m].i += scratch[0].r;
c@289 145
c@289 146 ++Fout;
c@289 147 }while(--k);
c@289 148 }
c@289 149
c@289 150 static void kf_bfly5(
c@289 151 kiss_fft_cpx * Fout,
c@289 152 const size_t fstride,
c@289 153 const kiss_fft_cfg st,
c@289 154 int m
c@289 155 )
c@289 156 {
c@289 157 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
c@289 158 int u;
c@289 159 kiss_fft_cpx scratch[13];
c@289 160 kiss_fft_cpx * twiddles = st->twiddles;
c@289 161 kiss_fft_cpx *tw;
c@289 162 kiss_fft_cpx ya,yb;
c@289 163 ya = twiddles[fstride*m];
c@289 164 yb = twiddles[fstride*2*m];
c@289 165
c@289 166 Fout0=Fout;
c@289 167 Fout1=Fout0+m;
c@289 168 Fout2=Fout0+2*m;
c@289 169 Fout3=Fout0+3*m;
c@289 170 Fout4=Fout0+4*m;
c@289 171
c@289 172 tw=st->twiddles;
c@289 173 for ( u=0; u<m; ++u ) {
c@289 174 C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
c@289 175 scratch[0] = *Fout0;
c@289 176
c@289 177 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
c@289 178 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
c@289 179 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
c@289 180 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
c@289 181
c@289 182 C_ADD( scratch[7],scratch[1],scratch[4]);
c@289 183 C_SUB( scratch[10],scratch[1],scratch[4]);
c@289 184 C_ADD( scratch[8],scratch[2],scratch[3]);
c@289 185 C_SUB( scratch[9],scratch[2],scratch[3]);
c@289 186
c@289 187 Fout0->r += scratch[7].r + scratch[8].r;
c@289 188 Fout0->i += scratch[7].i + scratch[8].i;
c@289 189
c@289 190 scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
c@289 191 scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
c@289 192
c@289 193 scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
c@289 194 scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
c@289 195
c@289 196 C_SUB(*Fout1,scratch[5],scratch[6]);
c@289 197 C_ADD(*Fout4,scratch[5],scratch[6]);
c@289 198
c@289 199 scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
c@289 200 scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
c@289 201 scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
c@289 202 scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
c@289 203
c@289 204 C_ADD(*Fout2,scratch[11],scratch[12]);
c@289 205 C_SUB(*Fout3,scratch[11],scratch[12]);
c@289 206
c@289 207 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
c@289 208 }
c@289 209 }
c@289 210
c@289 211 /* perform the butterfly for one stage of a mixed radix FFT */
c@289 212 static void kf_bfly_generic(
c@289 213 kiss_fft_cpx * Fout,
c@289 214 const size_t fstride,
c@289 215 const kiss_fft_cfg st,
c@289 216 int m,
c@289 217 int p
c@289 218 )
c@289 219 {
c@289 220 int u,k,q1,q;
c@289 221 kiss_fft_cpx * twiddles = st->twiddles;
c@289 222 kiss_fft_cpx t;
c@289 223 int Norig = st->nfft;
c@289 224
c@289 225 CHECKBUF(scratchbuf,nscratchbuf,p);
c@289 226
c@289 227 for ( u=0; u<m; ++u ) {
c@289 228 k=u;
c@289 229 for ( q1=0 ; q1<p ; ++q1 ) {
c@289 230 scratchbuf[q1] = Fout[ k ];
c@289 231 C_FIXDIV(scratchbuf[q1],p);
c@289 232 k += m;
c@289 233 }
c@289 234
c@289 235 k=u;
c@289 236 for ( q1=0 ; q1<p ; ++q1 ) {
c@289 237 int twidx=0;
c@289 238 Fout[ k ] = scratchbuf[0];
c@289 239 for (q=1;q<p;++q ) {
c@289 240 twidx += fstride * k;
c@289 241 if (twidx>=Norig) twidx-=Norig;
c@289 242 C_MUL(t,scratchbuf[q] , twiddles[twidx] );
c@289 243 C_ADDTO( Fout[ k ] ,t);
c@289 244 }
c@289 245 k += m;
c@289 246 }
c@289 247 }
c@289 248 }
c@289 249
c@289 250 static
c@289 251 void kf_work(
c@289 252 kiss_fft_cpx * Fout,
c@289 253 const kiss_fft_cpx * f,
c@289 254 const size_t fstride,
c@289 255 int in_stride,
c@289 256 int * factors,
c@289 257 const kiss_fft_cfg st
c@289 258 )
c@289 259 {
c@289 260 kiss_fft_cpx * Fout_beg=Fout;
c@289 261 const int p=*factors++; /* the radix */
c@289 262 const int m=*factors++; /* stage's fft length/p */
c@289 263 const kiss_fft_cpx * Fout_end = Fout + p*m;
c@289 264
c@289 265 if (m==1) {
c@289 266 do{
c@289 267 *Fout = *f;
c@289 268 f += fstride*in_stride;
c@289 269 }while(++Fout != Fout_end );
c@289 270 }else{
c@289 271 do{
c@289 272 kf_work( Fout , f, fstride*p, in_stride, factors,st);
c@289 273 f += fstride*in_stride;
c@289 274 }while( (Fout += m) != Fout_end );
c@289 275 }
c@289 276
c@289 277 Fout=Fout_beg;
c@289 278
c@289 279 switch (p) {
c@289 280 case 2: kf_bfly2(Fout,fstride,st,m); break;
c@289 281 case 3: kf_bfly3(Fout,fstride,st,m); break;
c@289 282 case 4: kf_bfly4(Fout,fstride,st,m); break;
c@289 283 case 5: kf_bfly5(Fout,fstride,st,m); break;
c@289 284 default: kf_bfly_generic(Fout,fstride,st,m,p); break;
c@289 285 }
c@289 286 }
c@289 287
c@289 288 /* facbuf is populated by p1,m1,p2,m2, ...
c@289 289 where
c@289 290 p[i] * m[i] = m[i-1]
c@289 291 m0 = n */
c@289 292 static
c@289 293 void kf_factor(int n,int * facbuf)
c@289 294 {
c@289 295 int p=4;
c@289 296 double floor_sqrt;
c@289 297 floor_sqrt = floor( sqrt((double)n) );
c@289 298
c@289 299 /*factor out powers of 4, powers of 2, then any remaining primes */
c@289 300 do {
c@289 301 while (n % p) {
c@289 302 switch (p) {
c@289 303 case 4: p = 2; break;
c@289 304 case 2: p = 3; break;
c@289 305 default: p += 2; break;
c@289 306 }
c@289 307 if (p > floor_sqrt)
c@289 308 p = n; /* no more factors, skip to end */
c@289 309 }
c@289 310 n /= p;
c@289 311 *facbuf++ = p;
c@289 312 *facbuf++ = n;
c@289 313 } while (n > 1);
c@289 314 }
c@289 315
c@289 316 /*
c@289 317 *
c@289 318 * User-callable function to allocate all necessary storage space for the fft.
c@289 319 *
c@289 320 * The return value is a contiguous block of memory, allocated with malloc. As such,
c@289 321 * It can be freed with free(), rather than a kiss_fft-specific function.
c@289 322 * */
c@289 323 kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem )
c@289 324 {
c@289 325 kiss_fft_cfg st=NULL;
c@289 326 size_t memneeded = sizeof(struct kiss_fft_state)
c@289 327 + sizeof(kiss_fft_cpx)*(nfft-1); /* twiddle factors*/
c@289 328
c@289 329 if ( lenmem==NULL ) {
c@289 330 st = ( kiss_fft_cfg)KISS_FFT_MALLOC( memneeded );
c@289 331 }else{
c@289 332 if (mem != NULL && *lenmem >= memneeded)
c@289 333 st = (kiss_fft_cfg)mem;
c@289 334 *lenmem = memneeded;
c@289 335 }
c@289 336 if (st) {
c@289 337 int i;
c@289 338 st->nfft=nfft;
c@289 339 st->inverse = inverse_fft;
c@289 340
c@289 341 for (i=0;i<nfft;++i) {
c@289 342 const double pi=3.141592653589793238462643383279502884197169399375105820974944;
c@289 343 double phase = -2*pi*i / nfft;
c@289 344 if (st->inverse)
c@289 345 phase *= -1;
c@289 346 kf_cexp(st->twiddles+i, phase );
c@289 347 }
c@289 348
c@289 349 kf_factor(nfft,st->factors);
c@289 350 }
c@289 351 return st;
c@289 352 }
c@289 353
c@289 354
c@289 355
c@289 356
c@289 357 void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
c@289 358 {
c@289 359 if (fin == fout) {
c@289 360 CHECKBUF(tmpbuf,ntmpbuf,st->nfft);
c@289 361 kf_work(tmpbuf,fin,1,in_stride, st->factors,st);
c@289 362 memcpy(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft);
c@289 363 }else{
c@289 364 kf_work( fout, fin, 1,in_stride, st->factors,st );
c@289 365 }
c@289 366 }
c@289 367
c@289 368 void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
c@289 369 {
c@289 370 kiss_fft_stride(cfg,fin,fout,1);
c@289 371 }
c@289 372
c@289 373
c@289 374 /* not really necessary to call, but if someone is doing in-place ffts, they may want to free the
c@289 375 buffers from CHECKBUF
c@289 376 */
c@289 377 void kiss_fft_cleanup(void)
c@289 378 {
c@289 379 free(scratchbuf);
c@289 380 scratchbuf = NULL;
c@289 381 nscratchbuf=0;
c@289 382 free(tmpbuf);
c@289 383 tmpbuf=NULL;
c@289 384 ntmpbuf=0;
c@289 385 }
c@289 386
c@289 387 int kiss_fft_next_fast_size(int n)
c@289 388 {
c@289 389 while(1) {
c@289 390 int m=n;
c@289 391 while ( (m%2) == 0 ) m/=2;
c@289 392 while ( (m%3) == 0 ) m/=3;
c@289 393 while ( (m%5) == 0 ) m/=5;
c@289 394 if (m<=1)
c@289 395 break; /* n is completely factorable by twos, threes, and fives */
c@289 396 n++;
c@289 397 }
c@289 398 return n;
c@289 399 }