annotate ext/kissfft/kiss_fft.c @ 207:f11ec82227d5

Change that seemed to be needed for emscripten double-precision build to work
author Chris Cannam
date Tue, 14 Mar 2017 13:40:50 +0000
parents 76ec2365b250
children
rev   line source
Chris@184 1 /*
Chris@184 2 Copyright (c) 2003-2010, Mark Borgerding
Chris@184 3
Chris@184 4 All rights reserved.
Chris@184 5
Chris@184 6 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
Chris@184 7
Chris@184 8 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
Chris@184 9 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
Chris@184 10 * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
Chris@184 11
Chris@184 12 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Chris@184 13 */
Chris@184 14
Chris@184 15
Chris@184 16 #include "_kiss_fft_guts.h"
Chris@184 17 /* The guts header contains all the multiplication and addition macros that are defined for
Chris@184 18 fixed or floating point complex numbers. It also delares the kf_ internal functions.
Chris@184 19 */
Chris@184 20
Chris@184 21 static void kf_bfly2(
Chris@184 22 kiss_fft_cpx * Fout,
Chris@184 23 const size_t fstride,
Chris@184 24 const kiss_fft_cfg st,
Chris@184 25 int m
Chris@184 26 )
Chris@184 27 {
Chris@184 28 kiss_fft_cpx * Fout2;
Chris@184 29 kiss_fft_cpx * tw1 = st->twiddles;
Chris@184 30 kiss_fft_cpx t;
Chris@184 31 Fout2 = Fout + m;
Chris@184 32 do{
Chris@184 33 C_FIXDIV(*Fout,2); C_FIXDIV(*Fout2,2);
Chris@184 34
Chris@184 35 C_MUL (t, *Fout2 , *tw1);
Chris@184 36 tw1 += fstride;
Chris@184 37 C_SUB( *Fout2 , *Fout , t );
Chris@184 38 C_ADDTO( *Fout , t );
Chris@184 39 ++Fout2;
Chris@184 40 ++Fout;
Chris@184 41 }while (--m);
Chris@184 42 }
Chris@184 43
Chris@184 44 static void kf_bfly4(
Chris@184 45 kiss_fft_cpx * Fout,
Chris@184 46 const size_t fstride,
Chris@184 47 const kiss_fft_cfg st,
Chris@184 48 const size_t m
Chris@184 49 )
Chris@184 50 {
Chris@184 51 kiss_fft_cpx *tw1,*tw2,*tw3;
Chris@184 52 kiss_fft_cpx scratch[6];
Chris@184 53 size_t k=m;
Chris@184 54 const size_t m2=2*m;
Chris@184 55 const size_t m3=3*m;
Chris@184 56
Chris@184 57
Chris@184 58 tw3 = tw2 = tw1 = st->twiddles;
Chris@184 59
Chris@184 60 do {
Chris@184 61 C_FIXDIV(*Fout,4); C_FIXDIV(Fout[m],4); C_FIXDIV(Fout[m2],4); C_FIXDIV(Fout[m3],4);
Chris@184 62
Chris@184 63 C_MUL(scratch[0],Fout[m] , *tw1 );
Chris@184 64 C_MUL(scratch[1],Fout[m2] , *tw2 );
Chris@184 65 C_MUL(scratch[2],Fout[m3] , *tw3 );
Chris@184 66
Chris@184 67 C_SUB( scratch[5] , *Fout, scratch[1] );
Chris@184 68 C_ADDTO(*Fout, scratch[1]);
Chris@184 69 C_ADD( scratch[3] , scratch[0] , scratch[2] );
Chris@184 70 C_SUB( scratch[4] , scratch[0] , scratch[2] );
Chris@184 71 C_SUB( Fout[m2], *Fout, scratch[3] );
Chris@184 72 tw1 += fstride;
Chris@184 73 tw2 += fstride*2;
Chris@184 74 tw3 += fstride*3;
Chris@184 75 C_ADDTO( *Fout , scratch[3] );
Chris@184 76
Chris@184 77 if(st->inverse) {
Chris@184 78 Fout[m].r = scratch[5].r - scratch[4].i;
Chris@184 79 Fout[m].i = scratch[5].i + scratch[4].r;
Chris@184 80 Fout[m3].r = scratch[5].r + scratch[4].i;
Chris@184 81 Fout[m3].i = scratch[5].i - scratch[4].r;
Chris@184 82 }else{
Chris@184 83 Fout[m].r = scratch[5].r + scratch[4].i;
Chris@184 84 Fout[m].i = scratch[5].i - scratch[4].r;
Chris@184 85 Fout[m3].r = scratch[5].r - scratch[4].i;
Chris@184 86 Fout[m3].i = scratch[5].i + scratch[4].r;
Chris@184 87 }
Chris@184 88 ++Fout;
Chris@184 89 }while(--k);
Chris@184 90 }
Chris@184 91
Chris@184 92 static void kf_bfly3(
Chris@184 93 kiss_fft_cpx * Fout,
Chris@184 94 const size_t fstride,
Chris@184 95 const kiss_fft_cfg st,
Chris@184 96 size_t m
Chris@184 97 )
Chris@184 98 {
Chris@184 99 size_t k=m;
Chris@184 100 const size_t m2 = 2*m;
Chris@184 101 kiss_fft_cpx *tw1,*tw2;
Chris@184 102 kiss_fft_cpx scratch[5];
Chris@184 103 kiss_fft_cpx epi3;
Chris@184 104 epi3 = st->twiddles[fstride*m];
Chris@184 105
Chris@184 106 tw1=tw2=st->twiddles;
Chris@184 107
Chris@184 108 do{
Chris@184 109 C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
Chris@184 110
Chris@184 111 C_MUL(scratch[1],Fout[m] , *tw1);
Chris@184 112 C_MUL(scratch[2],Fout[m2] , *tw2);
Chris@184 113
Chris@184 114 C_ADD(scratch[3],scratch[1],scratch[2]);
Chris@184 115 C_SUB(scratch[0],scratch[1],scratch[2]);
Chris@184 116 tw1 += fstride;
Chris@184 117 tw2 += fstride*2;
Chris@184 118
Chris@184 119 Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
Chris@184 120 Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
Chris@184 121
Chris@184 122 C_MULBYSCALAR( scratch[0] , epi3.i );
Chris@184 123
Chris@184 124 C_ADDTO(*Fout,scratch[3]);
Chris@184 125
Chris@184 126 Fout[m2].r = Fout[m].r + scratch[0].i;
Chris@184 127 Fout[m2].i = Fout[m].i - scratch[0].r;
Chris@184 128
Chris@184 129 Fout[m].r -= scratch[0].i;
Chris@184 130 Fout[m].i += scratch[0].r;
Chris@184 131
Chris@184 132 ++Fout;
Chris@184 133 }while(--k);
Chris@184 134 }
Chris@184 135
Chris@184 136 static void kf_bfly5(
Chris@184 137 kiss_fft_cpx * Fout,
Chris@184 138 const size_t fstride,
Chris@184 139 const kiss_fft_cfg st,
Chris@184 140 int m
Chris@184 141 )
Chris@184 142 {
Chris@184 143 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
Chris@184 144 int u;
Chris@184 145 kiss_fft_cpx scratch[13];
Chris@184 146 kiss_fft_cpx * twiddles = st->twiddles;
Chris@184 147 kiss_fft_cpx *tw;
Chris@184 148 kiss_fft_cpx ya,yb;
Chris@184 149 ya = twiddles[fstride*m];
Chris@184 150 yb = twiddles[fstride*2*m];
Chris@184 151
Chris@184 152 Fout0=Fout;
Chris@184 153 Fout1=Fout0+m;
Chris@184 154 Fout2=Fout0+2*m;
Chris@184 155 Fout3=Fout0+3*m;
Chris@184 156 Fout4=Fout0+4*m;
Chris@184 157
Chris@184 158 tw=st->twiddles;
Chris@184 159 for ( u=0; u<m; ++u ) {
Chris@184 160 C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
Chris@184 161 scratch[0] = *Fout0;
Chris@184 162
Chris@184 163 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
Chris@184 164 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
Chris@184 165 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
Chris@184 166 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
Chris@184 167
Chris@184 168 C_ADD( scratch[7],scratch[1],scratch[4]);
Chris@184 169 C_SUB( scratch[10],scratch[1],scratch[4]);
Chris@184 170 C_ADD( scratch[8],scratch[2],scratch[3]);
Chris@184 171 C_SUB( scratch[9],scratch[2],scratch[3]);
Chris@184 172
Chris@184 173 Fout0->r += scratch[7].r + scratch[8].r;
Chris@184 174 Fout0->i += scratch[7].i + scratch[8].i;
Chris@184 175
Chris@184 176 scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
Chris@184 177 scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
Chris@184 178
Chris@184 179 scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
Chris@184 180 scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
Chris@184 181
Chris@184 182 C_SUB(*Fout1,scratch[5],scratch[6]);
Chris@184 183 C_ADD(*Fout4,scratch[5],scratch[6]);
Chris@184 184
Chris@184 185 scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
Chris@184 186 scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
Chris@184 187 scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
Chris@184 188 scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
Chris@184 189
Chris@184 190 C_ADD(*Fout2,scratch[11],scratch[12]);
Chris@184 191 C_SUB(*Fout3,scratch[11],scratch[12]);
Chris@184 192
Chris@184 193 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
Chris@184 194 }
Chris@184 195 }
Chris@184 196
Chris@184 197 /* perform the butterfly for one stage of a mixed radix FFT */
Chris@184 198 static void kf_bfly_generic(
Chris@184 199 kiss_fft_cpx * Fout,
Chris@184 200 const size_t fstride,
Chris@184 201 const kiss_fft_cfg st,
Chris@184 202 int m,
Chris@184 203 int p
Chris@184 204 )
Chris@184 205 {
Chris@184 206 int u,k,q1,q;
Chris@184 207 kiss_fft_cpx * twiddles = st->twiddles;
Chris@184 208 kiss_fft_cpx t;
Chris@184 209 int Norig = st->nfft;
Chris@184 210
Chris@184 211 kiss_fft_cpx * scratch = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx)*p);
Chris@184 212
Chris@184 213 for ( u=0; u<m; ++u ) {
Chris@184 214 k=u;
Chris@184 215 for ( q1=0 ; q1<p ; ++q1 ) {
Chris@184 216 scratch[q1] = Fout[ k ];
Chris@184 217 C_FIXDIV(scratch[q1],p);
Chris@184 218 k += m;
Chris@184 219 }
Chris@184 220
Chris@184 221 k=u;
Chris@184 222 for ( q1=0 ; q1<p ; ++q1 ) {
Chris@184 223 int twidx=0;
Chris@184 224 Fout[ k ] = scratch[0];
Chris@184 225 for (q=1;q<p;++q ) {
Chris@184 226 twidx += fstride * k;
Chris@184 227 if (twidx>=Norig) twidx-=Norig;
Chris@184 228 C_MUL(t,scratch[q] , twiddles[twidx] );
Chris@184 229 C_ADDTO( Fout[ k ] ,t);
Chris@184 230 }
Chris@184 231 k += m;
Chris@184 232 }
Chris@184 233 }
Chris@184 234 KISS_FFT_TMP_FREE(scratch);
Chris@184 235 }
Chris@184 236
Chris@184 237 static
Chris@184 238 void kf_work(
Chris@184 239 kiss_fft_cpx * Fout,
Chris@184 240 const kiss_fft_cpx * f,
Chris@184 241 const size_t fstride,
Chris@184 242 int in_stride,
Chris@184 243 int * factors,
Chris@184 244 const kiss_fft_cfg st
Chris@184 245 )
Chris@184 246 {
Chris@184 247 kiss_fft_cpx * Fout_beg=Fout;
Chris@184 248 const int p=*factors++; /* the radix */
Chris@184 249 const int m=*factors++; /* stage's fft length/p */
Chris@184 250 const kiss_fft_cpx * Fout_end = Fout + p*m;
Chris@184 251
Chris@184 252 #ifdef _OPENMP
Chris@184 253 // use openmp extensions at the
Chris@184 254 // top-level (not recursive)
Chris@184 255 if (fstride==1 && p<=5)
Chris@184 256 {
Chris@184 257 int k;
Chris@184 258
Chris@184 259 // execute the p different work units in different threads
Chris@184 260 # pragma omp parallel for
Chris@184 261 for (k=0;k<p;++k)
Chris@184 262 kf_work( Fout +k*m, f+ fstride*in_stride*k,fstride*p,in_stride,factors,st);
Chris@184 263 // all threads have joined by this point
Chris@184 264
Chris@184 265 switch (p) {
Chris@184 266 case 2: kf_bfly2(Fout,fstride,st,m); break;
Chris@184 267 case 3: kf_bfly3(Fout,fstride,st,m); break;
Chris@184 268 case 4: kf_bfly4(Fout,fstride,st,m); break;
Chris@184 269 case 5: kf_bfly5(Fout,fstride,st,m); break;
Chris@184 270 default: kf_bfly_generic(Fout,fstride,st,m,p); break;
Chris@184 271 }
Chris@184 272 return;
Chris@184 273 }
Chris@184 274 #endif
Chris@184 275
Chris@184 276 if (m==1) {
Chris@184 277 do{
Chris@207 278 Fout->r = f->r;
Chris@207 279 Fout->i = f->i;
Chris@184 280 f += fstride*in_stride;
Chris@184 281 }while(++Fout != Fout_end );
Chris@184 282 }else{
Chris@184 283 do{
Chris@184 284 // recursive call:
Chris@184 285 // DFT of size m*p performed by doing
Chris@184 286 // p instances of smaller DFTs of size m,
Chris@184 287 // each one takes a decimated version of the input
Chris@184 288 kf_work( Fout , f, fstride*p, in_stride, factors,st);
Chris@184 289 f += fstride*in_stride;
Chris@184 290 }while( (Fout += m) != Fout_end );
Chris@184 291 }
Chris@184 292
Chris@184 293 Fout=Fout_beg;
Chris@184 294
Chris@184 295 // recombine the p smaller DFTs
Chris@184 296 switch (p) {
Chris@184 297 case 2: kf_bfly2(Fout,fstride,st,m); break;
Chris@184 298 case 3: kf_bfly3(Fout,fstride,st,m); break;
Chris@184 299 case 4: kf_bfly4(Fout,fstride,st,m); break;
Chris@184 300 case 5: kf_bfly5(Fout,fstride,st,m); break;
Chris@184 301 default: kf_bfly_generic(Fout,fstride,st,m,p); break;
Chris@184 302 }
Chris@184 303 }
Chris@184 304
Chris@184 305 /* facbuf is populated by p1,m1,p2,m2, ...
Chris@184 306 where
Chris@184 307 p[i] * m[i] = m[i-1]
Chris@184 308 m0 = n */
Chris@184 309 static
Chris@184 310 void kf_factor(int n,int * facbuf)
Chris@184 311 {
Chris@184 312 int p=4;
Chris@184 313 double floor_sqrt;
Chris@184 314 floor_sqrt = floor( sqrt((double)n) );
Chris@184 315
Chris@184 316 /*factor out powers of 4, powers of 2, then any remaining primes */
Chris@184 317 do {
Chris@184 318 while (n % p) {
Chris@184 319 switch (p) {
Chris@184 320 case 4: p = 2; break;
Chris@184 321 case 2: p = 3; break;
Chris@184 322 default: p += 2; break;
Chris@184 323 }
Chris@184 324 if (p > floor_sqrt)
Chris@184 325 p = n; /* no more factors, skip to end */
Chris@184 326 }
Chris@184 327 n /= p;
Chris@184 328 *facbuf++ = p;
Chris@184 329 *facbuf++ = n;
Chris@184 330 } while (n > 1);
Chris@184 331 }
Chris@184 332
Chris@184 333 /*
Chris@184 334 *
Chris@184 335 * User-callable function to allocate all necessary storage space for the fft.
Chris@184 336 *
Chris@184 337 * The return value is a contiguous block of memory, allocated with malloc. As such,
Chris@184 338 * It can be freed with free(), rather than a kiss_fft-specific function.
Chris@184 339 * */
Chris@184 340 kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem )
Chris@184 341 {
Chris@184 342 kiss_fft_cfg st=NULL;
Chris@184 343 size_t memneeded = sizeof(struct kiss_fft_state)
Chris@184 344 + sizeof(kiss_fft_cpx)*(nfft-1); /* twiddle factors*/
Chris@184 345
Chris@184 346 if ( lenmem==NULL ) {
Chris@184 347 st = ( kiss_fft_cfg)KISS_FFT_MALLOC( memneeded );
Chris@184 348 }else{
Chris@184 349 if (mem != NULL && *lenmem >= memneeded)
Chris@184 350 st = (kiss_fft_cfg)mem;
Chris@184 351 *lenmem = memneeded;
Chris@184 352 }
Chris@184 353 if (st) {
Chris@184 354 int i;
Chris@184 355 st->nfft=nfft;
Chris@184 356 st->inverse = inverse_fft;
Chris@184 357
Chris@184 358 for (i=0;i<nfft;++i) {
Chris@184 359 const double pi=3.141592653589793238462643383279502884197169399375105820974944;
Chris@184 360 double phase = -2*pi*i / nfft;
Chris@184 361 if (st->inverse)
Chris@184 362 phase *= -1;
Chris@184 363 kf_cexp(st->twiddles+i, phase );
Chris@184 364 }
Chris@184 365
Chris@184 366 kf_factor(nfft,st->factors);
Chris@184 367 }
Chris@184 368 return st;
Chris@184 369 }
Chris@184 370
Chris@184 371
Chris@184 372 void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
Chris@184 373 {
Chris@184 374 if (fin == fout) {
Chris@184 375 //NOTE: this is not really an in-place FFT algorithm.
Chris@184 376 //It just performs an out-of-place FFT into a temp buffer
Chris@184 377 kiss_fft_cpx * tmpbuf = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC( sizeof(kiss_fft_cpx)*st->nfft);
Chris@184 378 kf_work(tmpbuf,fin,1,in_stride, st->factors,st);
Chris@184 379 memcpy(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft);
Chris@184 380 KISS_FFT_TMP_FREE(tmpbuf);
Chris@184 381 }else{
Chris@184 382 kf_work( fout, fin, 1,in_stride, st->factors,st );
Chris@184 383 }
Chris@184 384 }
Chris@184 385
Chris@184 386 void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
Chris@184 387 {
Chris@184 388 kiss_fft_stride(cfg,fin,fout,1);
Chris@184 389 }
Chris@184 390
Chris@184 391
Chris@184 392 void kiss_fft_cleanup(void)
Chris@184 393 {
Chris@184 394 // nothing needed any more
Chris@184 395 }
Chris@184 396
Chris@184 397 int kiss_fft_next_fast_size(int n)
Chris@184 398 {
Chris@184 399 while(1) {
Chris@184 400 int m=n;
Chris@184 401 while ( (m%2) == 0 ) m/=2;
Chris@184 402 while ( (m%3) == 0 ) m/=3;
Chris@184 403 while ( (m%5) == 0 ) m/=5;
Chris@184 404 if (m<=1)
Chris@184 405 break; /* n is completely factorable by twos, threes, and fives */
Chris@184 406 n++;
Chris@184 407 }
Chris@184 408 return n;
Chris@184 409 }