annotate ext/kissfft/kiss_fft.c @ 206:335be766a54d

Fix erroneous header guard
author Chris Cannam
date Fri, 30 Sep 2016 19:04:06 +0100
parents 76ec2365b250
children f11ec82227d5
rev   line source
Chris@184 1 /*
Chris@184 2 Copyright (c) 2003-2010, Mark Borgerding
Chris@184 3
Chris@184 4 All rights reserved.
Chris@184 5
Chris@184 6 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
Chris@184 7
Chris@184 8 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
Chris@184 9 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
Chris@184 10 * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
Chris@184 11
Chris@184 12 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Chris@184 13 */
Chris@184 14
Chris@184 15
Chris@184 16 #include "_kiss_fft_guts.h"
Chris@184 17 /* The guts header contains all the multiplication and addition macros that are defined for
Chris@184 18 fixed or floating point complex numbers. It also delares the kf_ internal functions.
Chris@184 19 */
Chris@184 20
Chris@184 21 static void kf_bfly2(
Chris@184 22 kiss_fft_cpx * Fout,
Chris@184 23 const size_t fstride,
Chris@184 24 const kiss_fft_cfg st,
Chris@184 25 int m
Chris@184 26 )
Chris@184 27 {
Chris@184 28 kiss_fft_cpx * Fout2;
Chris@184 29 kiss_fft_cpx * tw1 = st->twiddles;
Chris@184 30 kiss_fft_cpx t;
Chris@184 31 Fout2 = Fout + m;
Chris@184 32 do{
Chris@184 33 C_FIXDIV(*Fout,2); C_FIXDIV(*Fout2,2);
Chris@184 34
Chris@184 35 C_MUL (t, *Fout2 , *tw1);
Chris@184 36 tw1 += fstride;
Chris@184 37 C_SUB( *Fout2 , *Fout , t );
Chris@184 38 C_ADDTO( *Fout , t );
Chris@184 39 ++Fout2;
Chris@184 40 ++Fout;
Chris@184 41 }while (--m);
Chris@184 42 }
Chris@184 43
Chris@184 44 static void kf_bfly4(
Chris@184 45 kiss_fft_cpx * Fout,
Chris@184 46 const size_t fstride,
Chris@184 47 const kiss_fft_cfg st,
Chris@184 48 const size_t m
Chris@184 49 )
Chris@184 50 {
Chris@184 51 kiss_fft_cpx *tw1,*tw2,*tw3;
Chris@184 52 kiss_fft_cpx scratch[6];
Chris@184 53 size_t k=m;
Chris@184 54 const size_t m2=2*m;
Chris@184 55 const size_t m3=3*m;
Chris@184 56
Chris@184 57
Chris@184 58 tw3 = tw2 = tw1 = st->twiddles;
Chris@184 59
Chris@184 60 do {
Chris@184 61 C_FIXDIV(*Fout,4); C_FIXDIV(Fout[m],4); C_FIXDIV(Fout[m2],4); C_FIXDIV(Fout[m3],4);
Chris@184 62
Chris@184 63 C_MUL(scratch[0],Fout[m] , *tw1 );
Chris@184 64 C_MUL(scratch[1],Fout[m2] , *tw2 );
Chris@184 65 C_MUL(scratch[2],Fout[m3] , *tw3 );
Chris@184 66
Chris@184 67 C_SUB( scratch[5] , *Fout, scratch[1] );
Chris@184 68 C_ADDTO(*Fout, scratch[1]);
Chris@184 69 C_ADD( scratch[3] , scratch[0] , scratch[2] );
Chris@184 70 C_SUB( scratch[4] , scratch[0] , scratch[2] );
Chris@184 71 C_SUB( Fout[m2], *Fout, scratch[3] );
Chris@184 72 tw1 += fstride;
Chris@184 73 tw2 += fstride*2;
Chris@184 74 tw3 += fstride*3;
Chris@184 75 C_ADDTO( *Fout , scratch[3] );
Chris@184 76
Chris@184 77 if(st->inverse) {
Chris@184 78 Fout[m].r = scratch[5].r - scratch[4].i;
Chris@184 79 Fout[m].i = scratch[5].i + scratch[4].r;
Chris@184 80 Fout[m3].r = scratch[5].r + scratch[4].i;
Chris@184 81 Fout[m3].i = scratch[5].i - scratch[4].r;
Chris@184 82 }else{
Chris@184 83 Fout[m].r = scratch[5].r + scratch[4].i;
Chris@184 84 Fout[m].i = scratch[5].i - scratch[4].r;
Chris@184 85 Fout[m3].r = scratch[5].r - scratch[4].i;
Chris@184 86 Fout[m3].i = scratch[5].i + scratch[4].r;
Chris@184 87 }
Chris@184 88 ++Fout;
Chris@184 89 }while(--k);
Chris@184 90 }
Chris@184 91
Chris@184 92 static void kf_bfly3(
Chris@184 93 kiss_fft_cpx * Fout,
Chris@184 94 const size_t fstride,
Chris@184 95 const kiss_fft_cfg st,
Chris@184 96 size_t m
Chris@184 97 )
Chris@184 98 {
Chris@184 99 size_t k=m;
Chris@184 100 const size_t m2 = 2*m;
Chris@184 101 kiss_fft_cpx *tw1,*tw2;
Chris@184 102 kiss_fft_cpx scratch[5];
Chris@184 103 kiss_fft_cpx epi3;
Chris@184 104 epi3 = st->twiddles[fstride*m];
Chris@184 105
Chris@184 106 tw1=tw2=st->twiddles;
Chris@184 107
Chris@184 108 do{
Chris@184 109 C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
Chris@184 110
Chris@184 111 C_MUL(scratch[1],Fout[m] , *tw1);
Chris@184 112 C_MUL(scratch[2],Fout[m2] , *tw2);
Chris@184 113
Chris@184 114 C_ADD(scratch[3],scratch[1],scratch[2]);
Chris@184 115 C_SUB(scratch[0],scratch[1],scratch[2]);
Chris@184 116 tw1 += fstride;
Chris@184 117 tw2 += fstride*2;
Chris@184 118
Chris@184 119 Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
Chris@184 120 Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
Chris@184 121
Chris@184 122 C_MULBYSCALAR( scratch[0] , epi3.i );
Chris@184 123
Chris@184 124 C_ADDTO(*Fout,scratch[3]);
Chris@184 125
Chris@184 126 Fout[m2].r = Fout[m].r + scratch[0].i;
Chris@184 127 Fout[m2].i = Fout[m].i - scratch[0].r;
Chris@184 128
Chris@184 129 Fout[m].r -= scratch[0].i;
Chris@184 130 Fout[m].i += scratch[0].r;
Chris@184 131
Chris@184 132 ++Fout;
Chris@184 133 }while(--k);
Chris@184 134 }
Chris@184 135
Chris@184 136 static void kf_bfly5(
Chris@184 137 kiss_fft_cpx * Fout,
Chris@184 138 const size_t fstride,
Chris@184 139 const kiss_fft_cfg st,
Chris@184 140 int m
Chris@184 141 )
Chris@184 142 {
Chris@184 143 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
Chris@184 144 int u;
Chris@184 145 kiss_fft_cpx scratch[13];
Chris@184 146 kiss_fft_cpx * twiddles = st->twiddles;
Chris@184 147 kiss_fft_cpx *tw;
Chris@184 148 kiss_fft_cpx ya,yb;
Chris@184 149 ya = twiddles[fstride*m];
Chris@184 150 yb = twiddles[fstride*2*m];
Chris@184 151
Chris@184 152 Fout0=Fout;
Chris@184 153 Fout1=Fout0+m;
Chris@184 154 Fout2=Fout0+2*m;
Chris@184 155 Fout3=Fout0+3*m;
Chris@184 156 Fout4=Fout0+4*m;
Chris@184 157
Chris@184 158 tw=st->twiddles;
Chris@184 159 for ( u=0; u<m; ++u ) {
Chris@184 160 C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
Chris@184 161 scratch[0] = *Fout0;
Chris@184 162
Chris@184 163 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
Chris@184 164 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
Chris@184 165 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
Chris@184 166 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
Chris@184 167
Chris@184 168 C_ADD( scratch[7],scratch[1],scratch[4]);
Chris@184 169 C_SUB( scratch[10],scratch[1],scratch[4]);
Chris@184 170 C_ADD( scratch[8],scratch[2],scratch[3]);
Chris@184 171 C_SUB( scratch[9],scratch[2],scratch[3]);
Chris@184 172
Chris@184 173 Fout0->r += scratch[7].r + scratch[8].r;
Chris@184 174 Fout0->i += scratch[7].i + scratch[8].i;
Chris@184 175
Chris@184 176 scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
Chris@184 177 scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
Chris@184 178
Chris@184 179 scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
Chris@184 180 scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
Chris@184 181
Chris@184 182 C_SUB(*Fout1,scratch[5],scratch[6]);
Chris@184 183 C_ADD(*Fout4,scratch[5],scratch[6]);
Chris@184 184
Chris@184 185 scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
Chris@184 186 scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
Chris@184 187 scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
Chris@184 188 scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
Chris@184 189
Chris@184 190 C_ADD(*Fout2,scratch[11],scratch[12]);
Chris@184 191 C_SUB(*Fout3,scratch[11],scratch[12]);
Chris@184 192
Chris@184 193 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
Chris@184 194 }
Chris@184 195 }
Chris@184 196
Chris@184 197 /* perform the butterfly for one stage of a mixed radix FFT */
Chris@184 198 static void kf_bfly_generic(
Chris@184 199 kiss_fft_cpx * Fout,
Chris@184 200 const size_t fstride,
Chris@184 201 const kiss_fft_cfg st,
Chris@184 202 int m,
Chris@184 203 int p
Chris@184 204 )
Chris@184 205 {
Chris@184 206 int u,k,q1,q;
Chris@184 207 kiss_fft_cpx * twiddles = st->twiddles;
Chris@184 208 kiss_fft_cpx t;
Chris@184 209 int Norig = st->nfft;
Chris@184 210
Chris@184 211 kiss_fft_cpx * scratch = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx)*p);
Chris@184 212
Chris@184 213 for ( u=0; u<m; ++u ) {
Chris@184 214 k=u;
Chris@184 215 for ( q1=0 ; q1<p ; ++q1 ) {
Chris@184 216 scratch[q1] = Fout[ k ];
Chris@184 217 C_FIXDIV(scratch[q1],p);
Chris@184 218 k += m;
Chris@184 219 }
Chris@184 220
Chris@184 221 k=u;
Chris@184 222 for ( q1=0 ; q1<p ; ++q1 ) {
Chris@184 223 int twidx=0;
Chris@184 224 Fout[ k ] = scratch[0];
Chris@184 225 for (q=1;q<p;++q ) {
Chris@184 226 twidx += fstride * k;
Chris@184 227 if (twidx>=Norig) twidx-=Norig;
Chris@184 228 C_MUL(t,scratch[q] , twiddles[twidx] );
Chris@184 229 C_ADDTO( Fout[ k ] ,t);
Chris@184 230 }
Chris@184 231 k += m;
Chris@184 232 }
Chris@184 233 }
Chris@184 234 KISS_FFT_TMP_FREE(scratch);
Chris@184 235 }
Chris@184 236
Chris@184 237 static
Chris@184 238 void kf_work(
Chris@184 239 kiss_fft_cpx * Fout,
Chris@184 240 const kiss_fft_cpx * f,
Chris@184 241 const size_t fstride,
Chris@184 242 int in_stride,
Chris@184 243 int * factors,
Chris@184 244 const kiss_fft_cfg st
Chris@184 245 )
Chris@184 246 {
Chris@184 247 kiss_fft_cpx * Fout_beg=Fout;
Chris@184 248 const int p=*factors++; /* the radix */
Chris@184 249 const int m=*factors++; /* stage's fft length/p */
Chris@184 250 const kiss_fft_cpx * Fout_end = Fout + p*m;
Chris@184 251
Chris@184 252 #ifdef _OPENMP
Chris@184 253 // use openmp extensions at the
Chris@184 254 // top-level (not recursive)
Chris@184 255 if (fstride==1 && p<=5)
Chris@184 256 {
Chris@184 257 int k;
Chris@184 258
Chris@184 259 // execute the p different work units in different threads
Chris@184 260 # pragma omp parallel for
Chris@184 261 for (k=0;k<p;++k)
Chris@184 262 kf_work( Fout +k*m, f+ fstride*in_stride*k,fstride*p,in_stride,factors,st);
Chris@184 263 // all threads have joined by this point
Chris@184 264
Chris@184 265 switch (p) {
Chris@184 266 case 2: kf_bfly2(Fout,fstride,st,m); break;
Chris@184 267 case 3: kf_bfly3(Fout,fstride,st,m); break;
Chris@184 268 case 4: kf_bfly4(Fout,fstride,st,m); break;
Chris@184 269 case 5: kf_bfly5(Fout,fstride,st,m); break;
Chris@184 270 default: kf_bfly_generic(Fout,fstride,st,m,p); break;
Chris@184 271 }
Chris@184 272 return;
Chris@184 273 }
Chris@184 274 #endif
Chris@184 275
Chris@184 276 if (m==1) {
Chris@184 277 do{
Chris@184 278 *Fout = *f;
Chris@184 279 f += fstride*in_stride;
Chris@184 280 }while(++Fout != Fout_end );
Chris@184 281 }else{
Chris@184 282 do{
Chris@184 283 // recursive call:
Chris@184 284 // DFT of size m*p performed by doing
Chris@184 285 // p instances of smaller DFTs of size m,
Chris@184 286 // each one takes a decimated version of the input
Chris@184 287 kf_work( Fout , f, fstride*p, in_stride, factors,st);
Chris@184 288 f += fstride*in_stride;
Chris@184 289 }while( (Fout += m) != Fout_end );
Chris@184 290 }
Chris@184 291
Chris@184 292 Fout=Fout_beg;
Chris@184 293
Chris@184 294 // recombine the p smaller DFTs
Chris@184 295 switch (p) {
Chris@184 296 case 2: kf_bfly2(Fout,fstride,st,m); break;
Chris@184 297 case 3: kf_bfly3(Fout,fstride,st,m); break;
Chris@184 298 case 4: kf_bfly4(Fout,fstride,st,m); break;
Chris@184 299 case 5: kf_bfly5(Fout,fstride,st,m); break;
Chris@184 300 default: kf_bfly_generic(Fout,fstride,st,m,p); break;
Chris@184 301 }
Chris@184 302 }
Chris@184 303
Chris@184 304 /* facbuf is populated by p1,m1,p2,m2, ...
Chris@184 305 where
Chris@184 306 p[i] * m[i] = m[i-1]
Chris@184 307 m0 = n */
Chris@184 308 static
Chris@184 309 void kf_factor(int n,int * facbuf)
Chris@184 310 {
Chris@184 311 int p=4;
Chris@184 312 double floor_sqrt;
Chris@184 313 floor_sqrt = floor( sqrt((double)n) );
Chris@184 314
Chris@184 315 /*factor out powers of 4, powers of 2, then any remaining primes */
Chris@184 316 do {
Chris@184 317 while (n % p) {
Chris@184 318 switch (p) {
Chris@184 319 case 4: p = 2; break;
Chris@184 320 case 2: p = 3; break;
Chris@184 321 default: p += 2; break;
Chris@184 322 }
Chris@184 323 if (p > floor_sqrt)
Chris@184 324 p = n; /* no more factors, skip to end */
Chris@184 325 }
Chris@184 326 n /= p;
Chris@184 327 *facbuf++ = p;
Chris@184 328 *facbuf++ = n;
Chris@184 329 } while (n > 1);
Chris@184 330 }
Chris@184 331
Chris@184 332 /*
Chris@184 333 *
Chris@184 334 * User-callable function to allocate all necessary storage space for the fft.
Chris@184 335 *
Chris@184 336 * The return value is a contiguous block of memory, allocated with malloc. As such,
Chris@184 337 * It can be freed with free(), rather than a kiss_fft-specific function.
Chris@184 338 * */
Chris@184 339 kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem )
Chris@184 340 {
Chris@184 341 kiss_fft_cfg st=NULL;
Chris@184 342 size_t memneeded = sizeof(struct kiss_fft_state)
Chris@184 343 + sizeof(kiss_fft_cpx)*(nfft-1); /* twiddle factors*/
Chris@184 344
Chris@184 345 if ( lenmem==NULL ) {
Chris@184 346 st = ( kiss_fft_cfg)KISS_FFT_MALLOC( memneeded );
Chris@184 347 }else{
Chris@184 348 if (mem != NULL && *lenmem >= memneeded)
Chris@184 349 st = (kiss_fft_cfg)mem;
Chris@184 350 *lenmem = memneeded;
Chris@184 351 }
Chris@184 352 if (st) {
Chris@184 353 int i;
Chris@184 354 st->nfft=nfft;
Chris@184 355 st->inverse = inverse_fft;
Chris@184 356
Chris@184 357 for (i=0;i<nfft;++i) {
Chris@184 358 const double pi=3.141592653589793238462643383279502884197169399375105820974944;
Chris@184 359 double phase = -2*pi*i / nfft;
Chris@184 360 if (st->inverse)
Chris@184 361 phase *= -1;
Chris@184 362 kf_cexp(st->twiddles+i, phase );
Chris@184 363 }
Chris@184 364
Chris@184 365 kf_factor(nfft,st->factors);
Chris@184 366 }
Chris@184 367 return st;
Chris@184 368 }
Chris@184 369
Chris@184 370
Chris@184 371 void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
Chris@184 372 {
Chris@184 373 if (fin == fout) {
Chris@184 374 //NOTE: this is not really an in-place FFT algorithm.
Chris@184 375 //It just performs an out-of-place FFT into a temp buffer
Chris@184 376 kiss_fft_cpx * tmpbuf = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC( sizeof(kiss_fft_cpx)*st->nfft);
Chris@184 377 kf_work(tmpbuf,fin,1,in_stride, st->factors,st);
Chris@184 378 memcpy(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft);
Chris@184 379 KISS_FFT_TMP_FREE(tmpbuf);
Chris@184 380 }else{
Chris@184 381 kf_work( fout, fin, 1,in_stride, st->factors,st );
Chris@184 382 }
Chris@184 383 }
Chris@184 384
Chris@184 385 void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
Chris@184 386 {
Chris@184 387 kiss_fft_stride(cfg,fin,fout,1);
Chris@184 388 }
Chris@184 389
Chris@184 390
Chris@184 391 void kiss_fft_cleanup(void)
Chris@184 392 {
Chris@184 393 // nothing needed any more
Chris@184 394 }
Chris@184 395
Chris@184 396 int kiss_fft_next_fast_size(int n)
Chris@184 397 {
Chris@184 398 while(1) {
Chris@184 399 int m=n;
Chris@184 400 while ( (m%2) == 0 ) m/=2;
Chris@184 401 while ( (m%3) == 0 ) m/=3;
Chris@184 402 while ( (m%5) == 0 ) m/=5;
Chris@184 403 if (m<=1)
Chris@184 404 break; /* n is completely factorable by twos, threes, and fives */
Chris@184 405 n++;
Chris@184 406 }
Chris@184 407 return n;
Chris@184 408 }