annotate src/ext/kissfft/kiss_fft.c @ 196:da283326bcd3 tip master

Update plugin versions in RDF
author Chris Cannam <cannam@all-day-breakfast.com>
date Fri, 28 Feb 2020 09:43:02 +0000
parents c94c81d9a5aa
children
rev   line source
c@174 1 /*
c@174 2 Copyright (c) 2003-2010, Mark Borgerding
c@174 3
c@174 4 All rights reserved.
c@174 5
c@174 6 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
c@174 7
c@174 8 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
c@174 9 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
c@174 10 * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
c@174 11
c@174 12 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
c@174 13 */
c@174 14
c@174 15
c@174 16 #include "_kiss_fft_guts.h"
c@174 17 /* The guts header contains all the multiplication and addition macros that are defined for
c@174 18 fixed or floating point complex numbers. It also delares the kf_ internal functions.
c@174 19 */
c@174 20
c@174 21 static void kf_bfly2(
c@174 22 kiss_fft_cpx * Fout,
c@174 23 const size_t fstride,
c@174 24 const kiss_fft_cfg st,
c@174 25 int m
c@174 26 )
c@174 27 {
c@174 28 kiss_fft_cpx * Fout2;
c@174 29 kiss_fft_cpx * tw1 = st->twiddles;
c@174 30 kiss_fft_cpx t;
c@174 31 Fout2 = Fout + m;
c@174 32 do{
c@174 33 C_FIXDIV(*Fout,2); C_FIXDIV(*Fout2,2);
c@174 34
c@174 35 C_MUL (t, *Fout2 , *tw1);
c@174 36 tw1 += fstride;
c@174 37 C_SUB( *Fout2 , *Fout , t );
c@174 38 C_ADDTO( *Fout , t );
c@174 39 ++Fout2;
c@174 40 ++Fout;
c@174 41 }while (--m);
c@174 42 }
c@174 43
c@174 44 static void kf_bfly4(
c@174 45 kiss_fft_cpx * Fout,
c@174 46 const size_t fstride,
c@174 47 const kiss_fft_cfg st,
c@174 48 const size_t m
c@174 49 )
c@174 50 {
c@174 51 kiss_fft_cpx *tw1,*tw2,*tw3;
c@174 52 kiss_fft_cpx scratch[6];
c@174 53 size_t k=m;
c@174 54 const size_t m2=2*m;
c@174 55 const size_t m3=3*m;
c@174 56
c@174 57
c@174 58 tw3 = tw2 = tw1 = st->twiddles;
c@174 59
c@174 60 do {
c@174 61 C_FIXDIV(*Fout,4); C_FIXDIV(Fout[m],4); C_FIXDIV(Fout[m2],4); C_FIXDIV(Fout[m3],4);
c@174 62
c@174 63 C_MUL(scratch[0],Fout[m] , *tw1 );
c@174 64 C_MUL(scratch[1],Fout[m2] , *tw2 );
c@174 65 C_MUL(scratch[2],Fout[m3] , *tw3 );
c@174 66
c@174 67 C_SUB( scratch[5] , *Fout, scratch[1] );
c@174 68 C_ADDTO(*Fout, scratch[1]);
c@174 69 C_ADD( scratch[3] , scratch[0] , scratch[2] );
c@174 70 C_SUB( scratch[4] , scratch[0] , scratch[2] );
c@174 71 C_SUB( Fout[m2], *Fout, scratch[3] );
c@174 72 tw1 += fstride;
c@174 73 tw2 += fstride*2;
c@174 74 tw3 += fstride*3;
c@174 75 C_ADDTO( *Fout , scratch[3] );
c@174 76
c@174 77 if(st->inverse) {
c@174 78 Fout[m].r = scratch[5].r - scratch[4].i;
c@174 79 Fout[m].i = scratch[5].i + scratch[4].r;
c@174 80 Fout[m3].r = scratch[5].r + scratch[4].i;
c@174 81 Fout[m3].i = scratch[5].i - scratch[4].r;
c@174 82 }else{
c@174 83 Fout[m].r = scratch[5].r + scratch[4].i;
c@174 84 Fout[m].i = scratch[5].i - scratch[4].r;
c@174 85 Fout[m3].r = scratch[5].r - scratch[4].i;
c@174 86 Fout[m3].i = scratch[5].i + scratch[4].r;
c@174 87 }
c@174 88 ++Fout;
c@174 89 }while(--k);
c@174 90 }
c@174 91
c@174 92 static void kf_bfly3(
c@174 93 kiss_fft_cpx * Fout,
c@174 94 const size_t fstride,
c@174 95 const kiss_fft_cfg st,
c@174 96 size_t m
c@174 97 )
c@174 98 {
c@174 99 size_t k=m;
c@174 100 const size_t m2 = 2*m;
c@174 101 kiss_fft_cpx *tw1,*tw2;
c@174 102 kiss_fft_cpx scratch[5];
c@174 103 kiss_fft_cpx epi3;
c@174 104 epi3 = st->twiddles[fstride*m];
c@174 105
c@174 106 tw1=tw2=st->twiddles;
c@174 107
c@174 108 do{
c@174 109 C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
c@174 110
c@174 111 C_MUL(scratch[1],Fout[m] , *tw1);
c@174 112 C_MUL(scratch[2],Fout[m2] , *tw2);
c@174 113
c@174 114 C_ADD(scratch[3],scratch[1],scratch[2]);
c@174 115 C_SUB(scratch[0],scratch[1],scratch[2]);
c@174 116 tw1 += fstride;
c@174 117 tw2 += fstride*2;
c@174 118
c@174 119 Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
c@174 120 Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
c@174 121
c@174 122 C_MULBYSCALAR( scratch[0] , epi3.i );
c@174 123
c@174 124 C_ADDTO(*Fout,scratch[3]);
c@174 125
c@174 126 Fout[m2].r = Fout[m].r + scratch[0].i;
c@174 127 Fout[m2].i = Fout[m].i - scratch[0].r;
c@174 128
c@174 129 Fout[m].r -= scratch[0].i;
c@174 130 Fout[m].i += scratch[0].r;
c@174 131
c@174 132 ++Fout;
c@174 133 }while(--k);
c@174 134 }
c@174 135
c@174 136 static void kf_bfly5(
c@174 137 kiss_fft_cpx * Fout,
c@174 138 const size_t fstride,
c@174 139 const kiss_fft_cfg st,
c@174 140 int m
c@174 141 )
c@174 142 {
c@174 143 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
c@174 144 int u;
c@174 145 kiss_fft_cpx scratch[13];
c@174 146 kiss_fft_cpx * twiddles = st->twiddles;
c@174 147 kiss_fft_cpx *tw;
c@174 148 kiss_fft_cpx ya,yb;
c@174 149 ya = twiddles[fstride*m];
c@174 150 yb = twiddles[fstride*2*m];
c@174 151
c@174 152 Fout0=Fout;
c@174 153 Fout1=Fout0+m;
c@174 154 Fout2=Fout0+2*m;
c@174 155 Fout3=Fout0+3*m;
c@174 156 Fout4=Fout0+4*m;
c@174 157
c@174 158 tw=st->twiddles;
c@174 159 for ( u=0; u<m; ++u ) {
c@174 160 C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
c@174 161 scratch[0] = *Fout0;
c@174 162
c@174 163 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
c@174 164 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
c@174 165 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
c@174 166 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
c@174 167
c@174 168 C_ADD( scratch[7],scratch[1],scratch[4]);
c@174 169 C_SUB( scratch[10],scratch[1],scratch[4]);
c@174 170 C_ADD( scratch[8],scratch[2],scratch[3]);
c@174 171 C_SUB( scratch[9],scratch[2],scratch[3]);
c@174 172
c@174 173 Fout0->r += scratch[7].r + scratch[8].r;
c@174 174 Fout0->i += scratch[7].i + scratch[8].i;
c@174 175
c@174 176 scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
c@174 177 scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
c@174 178
c@174 179 scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
c@174 180 scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
c@174 181
c@174 182 C_SUB(*Fout1,scratch[5],scratch[6]);
c@174 183 C_ADD(*Fout4,scratch[5],scratch[6]);
c@174 184
c@174 185 scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
c@174 186 scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
c@174 187 scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
c@174 188 scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
c@174 189
c@174 190 C_ADD(*Fout2,scratch[11],scratch[12]);
c@174 191 C_SUB(*Fout3,scratch[11],scratch[12]);
c@174 192
c@174 193 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
c@174 194 }
c@174 195 }
c@174 196
c@174 197 /* perform the butterfly for one stage of a mixed radix FFT */
c@174 198 static void kf_bfly_generic(
c@174 199 kiss_fft_cpx * Fout,
c@174 200 const size_t fstride,
c@174 201 const kiss_fft_cfg st,
c@174 202 int m,
c@174 203 int p
c@174 204 )
c@174 205 {
c@174 206 int u,k,q1,q;
c@174 207 kiss_fft_cpx * twiddles = st->twiddles;
c@174 208 kiss_fft_cpx t;
c@174 209 int Norig = st->nfft;
c@174 210
c@174 211 kiss_fft_cpx * scratch = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx)*p);
c@174 212
c@174 213 for ( u=0; u<m; ++u ) {
c@174 214 k=u;
c@174 215 for ( q1=0 ; q1<p ; ++q1 ) {
c@174 216 scratch[q1] = Fout[ k ];
c@174 217 C_FIXDIV(scratch[q1],p);
c@174 218 k += m;
c@174 219 }
c@174 220
c@174 221 k=u;
c@174 222 for ( q1=0 ; q1<p ; ++q1 ) {
c@174 223 int twidx=0;
c@174 224 Fout[ k ] = scratch[0];
c@174 225 for (q=1;q<p;++q ) {
c@174 226 twidx += fstride * k;
c@174 227 if (twidx>=Norig) twidx-=Norig;
c@174 228 C_MUL(t,scratch[q] , twiddles[twidx] );
c@174 229 C_ADDTO( Fout[ k ] ,t);
c@174 230 }
c@174 231 k += m;
c@174 232 }
c@174 233 }
c@174 234 KISS_FFT_TMP_FREE(scratch);
c@174 235 }
c@174 236
c@174 237 static
c@174 238 void kf_work(
c@174 239 kiss_fft_cpx * Fout,
c@174 240 const kiss_fft_cpx * f,
c@174 241 const size_t fstride,
c@174 242 int in_stride,
c@174 243 int * factors,
c@174 244 const kiss_fft_cfg st
c@174 245 )
c@174 246 {
c@174 247 kiss_fft_cpx * Fout_beg=Fout;
c@174 248 const int p=*factors++; /* the radix */
c@174 249 const int m=*factors++; /* stage's fft length/p */
c@174 250 const kiss_fft_cpx * Fout_end = Fout + p*m;
c@174 251
c@174 252 #ifdef _OPENMP
c@174 253 // use openmp extensions at the
c@174 254 // top-level (not recursive)
c@174 255 if (fstride==1 && p<=5)
c@174 256 {
c@174 257 int k;
c@174 258
c@174 259 // execute the p different work units in different threads
c@174 260 # pragma omp parallel for
c@174 261 for (k=0;k<p;++k)
c@174 262 kf_work( Fout +k*m, f+ fstride*in_stride*k,fstride*p,in_stride,factors,st);
c@174 263 // all threads have joined by this point
c@174 264
c@174 265 switch (p) {
c@174 266 case 2: kf_bfly2(Fout,fstride,st,m); break;
c@174 267 case 3: kf_bfly3(Fout,fstride,st,m); break;
c@174 268 case 4: kf_bfly4(Fout,fstride,st,m); break;
c@174 269 case 5: kf_bfly5(Fout,fstride,st,m); break;
c@174 270 default: kf_bfly_generic(Fout,fstride,st,m,p); break;
c@174 271 }
c@174 272 return;
c@174 273 }
c@174 274 #endif
c@174 275
c@174 276 if (m==1) {
c@174 277 do{
c@189 278 Fout->r = f->r;
c@189 279 Fout->i = f->i;
c@174 280 f += fstride*in_stride;
c@174 281 }while(++Fout != Fout_end );
c@174 282 }else{
c@174 283 do{
c@174 284 // recursive call:
c@174 285 // DFT of size m*p performed by doing
c@174 286 // p instances of smaller DFTs of size m,
c@174 287 // each one takes a decimated version of the input
c@174 288 kf_work( Fout , f, fstride*p, in_stride, factors,st);
c@174 289 f += fstride*in_stride;
c@174 290 }while( (Fout += m) != Fout_end );
c@174 291 }
c@174 292
c@174 293 Fout=Fout_beg;
c@174 294
c@174 295 // recombine the p smaller DFTs
c@174 296 switch (p) {
c@174 297 case 2: kf_bfly2(Fout,fstride,st,m); break;
c@174 298 case 3: kf_bfly3(Fout,fstride,st,m); break;
c@174 299 case 4: kf_bfly4(Fout,fstride,st,m); break;
c@174 300 case 5: kf_bfly5(Fout,fstride,st,m); break;
c@174 301 default: kf_bfly_generic(Fout,fstride,st,m,p); break;
c@174 302 }
c@174 303 }
c@174 304
c@174 305 /* facbuf is populated by p1,m1,p2,m2, ...
c@174 306 where
c@174 307 p[i] * m[i] = m[i-1]
c@174 308 m0 = n */
c@174 309 static
c@174 310 void kf_factor(int n,int * facbuf)
c@174 311 {
c@174 312 int p=4;
c@174 313 double floor_sqrt;
c@174 314 floor_sqrt = floor( sqrt((double)n) );
c@174 315
c@174 316 /*factor out powers of 4, powers of 2, then any remaining primes */
c@174 317 do {
c@174 318 while (n % p) {
c@174 319 switch (p) {
c@174 320 case 4: p = 2; break;
c@174 321 case 2: p = 3; break;
c@174 322 default: p += 2; break;
c@174 323 }
c@174 324 if (p > floor_sqrt)
c@174 325 p = n; /* no more factors, skip to end */
c@174 326 }
c@174 327 n /= p;
c@174 328 *facbuf++ = p;
c@174 329 *facbuf++ = n;
c@174 330 } while (n > 1);
c@174 331 }
c@174 332
c@174 333 /*
c@174 334 *
c@174 335 * User-callable function to allocate all necessary storage space for the fft.
c@174 336 *
c@174 337 * The return value is a contiguous block of memory, allocated with malloc. As such,
c@174 338 * It can be freed with free(), rather than a kiss_fft-specific function.
c@174 339 * */
c@174 340 kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem )
c@174 341 {
c@174 342 kiss_fft_cfg st=NULL;
c@174 343 size_t memneeded = sizeof(struct kiss_fft_state)
c@174 344 + sizeof(kiss_fft_cpx)*(nfft-1); /* twiddle factors*/
c@174 345
c@174 346 if ( lenmem==NULL ) {
c@174 347 st = ( kiss_fft_cfg)KISS_FFT_MALLOC( memneeded );
c@174 348 }else{
c@174 349 if (mem != NULL && *lenmem >= memneeded)
c@174 350 st = (kiss_fft_cfg)mem;
c@174 351 *lenmem = memneeded;
c@174 352 }
c@174 353 if (st) {
c@174 354 int i;
c@174 355 st->nfft=nfft;
c@174 356 st->inverse = inverse_fft;
c@174 357
c@174 358 for (i=0;i<nfft;++i) {
c@174 359 const double pi=3.141592653589793238462643383279502884197169399375105820974944;
c@174 360 double phase = -2*pi*i / nfft;
c@174 361 if (st->inverse)
c@174 362 phase *= -1;
c@174 363 kf_cexp(st->twiddles+i, phase );
c@174 364 }
c@174 365
c@174 366 kf_factor(nfft,st->factors);
c@174 367 }
c@174 368 return st;
c@174 369 }
c@174 370
c@174 371
c@174 372 void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
c@174 373 {
c@174 374 if (fin == fout) {
c@174 375 //NOTE: this is not really an in-place FFT algorithm.
c@174 376 //It just performs an out-of-place FFT into a temp buffer
c@174 377 kiss_fft_cpx * tmpbuf = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC( sizeof(kiss_fft_cpx)*st->nfft);
c@174 378 kf_work(tmpbuf,fin,1,in_stride, st->factors,st);
c@174 379 memcpy(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft);
c@174 380 KISS_FFT_TMP_FREE(tmpbuf);
c@174 381 }else{
c@174 382 kf_work( fout, fin, 1,in_stride, st->factors,st );
c@174 383 }
c@174 384 }
c@174 385
c@174 386 void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
c@174 387 {
c@174 388 kiss_fft_stride(cfg,fin,fout,1);
c@174 389 }
c@174 390
c@174 391
c@174 392 void kiss_fft_cleanup(void)
c@174 393 {
c@174 394 // nothing needed any more
c@174 395 }
c@174 396
c@174 397 int kiss_fft_next_fast_size(int n)
c@174 398 {
c@174 399 while(1) {
c@174 400 int m=n;
c@174 401 while ( (m%2) == 0 ) m/=2;
c@174 402 while ( (m%3) == 0 ) m/=3;
c@174 403 while ( (m%5) == 0 ) m/=5;
c@174 404 if (m<=1)
c@174 405 break; /* n is completely factorable by twos, threes, and fives */
c@174 406 n++;
c@174 407 }
c@174 408 return n;
c@174 409 }