annotate constant-q-cpp/src/ext/kissfft/kiss_fft.c @ 372:af71cbdab621 tip

Update bqvec code
author Chris Cannam
date Tue, 19 Nov 2019 10:13:32 +0000
parents 5d0a2ebb4d17
children
rev   line source
Chris@366 1 /*
Chris@366 2 Copyright (c) 2003-2010, Mark Borgerding
Chris@366 3
Chris@366 4 All rights reserved.
Chris@366 5
Chris@366 6 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
Chris@366 7
Chris@366 8 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
Chris@366 9 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
Chris@366 10 * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
Chris@366 11
Chris@366 12 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Chris@366 13 */
Chris@366 14
Chris@366 15
Chris@366 16 #include "_kiss_fft_guts.h"
Chris@366 17 /* The guts header contains all the multiplication and addition macros that are defined for
Chris@366 18 fixed or floating point complex numbers. It also delares the kf_ internal functions.
Chris@366 19 */
Chris@366 20
Chris@366 21 static void kf_bfly2(
Chris@366 22 kiss_fft_cpx * Fout,
Chris@366 23 const size_t fstride,
Chris@366 24 const kiss_fft_cfg st,
Chris@366 25 int m
Chris@366 26 )
Chris@366 27 {
Chris@366 28 kiss_fft_cpx * Fout2;
Chris@366 29 kiss_fft_cpx * tw1 = st->twiddles;
Chris@366 30 kiss_fft_cpx t;
Chris@366 31 Fout2 = Fout + m;
Chris@366 32 do{
Chris@366 33 C_FIXDIV(*Fout,2); C_FIXDIV(*Fout2,2);
Chris@366 34
Chris@366 35 C_MUL (t, *Fout2 , *tw1);
Chris@366 36 tw1 += fstride;
Chris@366 37 C_SUB( *Fout2 , *Fout , t );
Chris@366 38 C_ADDTO( *Fout , t );
Chris@366 39 ++Fout2;
Chris@366 40 ++Fout;
Chris@366 41 }while (--m);
Chris@366 42 }
Chris@366 43
Chris@366 44 static void kf_bfly4(
Chris@366 45 kiss_fft_cpx * Fout,
Chris@366 46 const size_t fstride,
Chris@366 47 const kiss_fft_cfg st,
Chris@366 48 const size_t m
Chris@366 49 )
Chris@366 50 {
Chris@366 51 kiss_fft_cpx *tw1,*tw2,*tw3;
Chris@366 52 kiss_fft_cpx scratch[6];
Chris@366 53 size_t k=m;
Chris@366 54 const size_t m2=2*m;
Chris@366 55 const size_t m3=3*m;
Chris@366 56
Chris@366 57
Chris@366 58 tw3 = tw2 = tw1 = st->twiddles;
Chris@366 59
Chris@366 60 do {
Chris@366 61 C_FIXDIV(*Fout,4); C_FIXDIV(Fout[m],4); C_FIXDIV(Fout[m2],4); C_FIXDIV(Fout[m3],4);
Chris@366 62
Chris@366 63 C_MUL(scratch[0],Fout[m] , *tw1 );
Chris@366 64 C_MUL(scratch[1],Fout[m2] , *tw2 );
Chris@366 65 C_MUL(scratch[2],Fout[m3] , *tw3 );
Chris@366 66
Chris@366 67 C_SUB( scratch[5] , *Fout, scratch[1] );
Chris@366 68 C_ADDTO(*Fout, scratch[1]);
Chris@366 69 C_ADD( scratch[3] , scratch[0] , scratch[2] );
Chris@366 70 C_SUB( scratch[4] , scratch[0] , scratch[2] );
Chris@366 71 C_SUB( Fout[m2], *Fout, scratch[3] );
Chris@366 72 tw1 += fstride;
Chris@366 73 tw2 += fstride*2;
Chris@366 74 tw3 += fstride*3;
Chris@366 75 C_ADDTO( *Fout , scratch[3] );
Chris@366 76
Chris@366 77 if(st->inverse) {
Chris@366 78 Fout[m].r = scratch[5].r - scratch[4].i;
Chris@366 79 Fout[m].i = scratch[5].i + scratch[4].r;
Chris@366 80 Fout[m3].r = scratch[5].r + scratch[4].i;
Chris@366 81 Fout[m3].i = scratch[5].i - scratch[4].r;
Chris@366 82 }else{
Chris@366 83 Fout[m].r = scratch[5].r + scratch[4].i;
Chris@366 84 Fout[m].i = scratch[5].i - scratch[4].r;
Chris@366 85 Fout[m3].r = scratch[5].r - scratch[4].i;
Chris@366 86 Fout[m3].i = scratch[5].i + scratch[4].r;
Chris@366 87 }
Chris@366 88 ++Fout;
Chris@366 89 }while(--k);
Chris@366 90 }
Chris@366 91
Chris@366 92 static void kf_bfly3(
Chris@366 93 kiss_fft_cpx * Fout,
Chris@366 94 const size_t fstride,
Chris@366 95 const kiss_fft_cfg st,
Chris@366 96 size_t m
Chris@366 97 )
Chris@366 98 {
Chris@366 99 size_t k=m;
Chris@366 100 const size_t m2 = 2*m;
Chris@366 101 kiss_fft_cpx *tw1,*tw2;
Chris@366 102 kiss_fft_cpx scratch[5];
Chris@366 103 kiss_fft_cpx epi3;
Chris@366 104 epi3 = st->twiddles[fstride*m];
Chris@366 105
Chris@366 106 tw1=tw2=st->twiddles;
Chris@366 107
Chris@366 108 do{
Chris@366 109 C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
Chris@366 110
Chris@366 111 C_MUL(scratch[1],Fout[m] , *tw1);
Chris@366 112 C_MUL(scratch[2],Fout[m2] , *tw2);
Chris@366 113
Chris@366 114 C_ADD(scratch[3],scratch[1],scratch[2]);
Chris@366 115 C_SUB(scratch[0],scratch[1],scratch[2]);
Chris@366 116 tw1 += fstride;
Chris@366 117 tw2 += fstride*2;
Chris@366 118
Chris@366 119 Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
Chris@366 120 Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
Chris@366 121
Chris@366 122 C_MULBYSCALAR( scratch[0] , epi3.i );
Chris@366 123
Chris@366 124 C_ADDTO(*Fout,scratch[3]);
Chris@366 125
Chris@366 126 Fout[m2].r = Fout[m].r + scratch[0].i;
Chris@366 127 Fout[m2].i = Fout[m].i - scratch[0].r;
Chris@366 128
Chris@366 129 Fout[m].r -= scratch[0].i;
Chris@366 130 Fout[m].i += scratch[0].r;
Chris@366 131
Chris@366 132 ++Fout;
Chris@366 133 }while(--k);
Chris@366 134 }
Chris@366 135
Chris@366 136 static void kf_bfly5(
Chris@366 137 kiss_fft_cpx * Fout,
Chris@366 138 const size_t fstride,
Chris@366 139 const kiss_fft_cfg st,
Chris@366 140 int m
Chris@366 141 )
Chris@366 142 {
Chris@366 143 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
Chris@366 144 int u;
Chris@366 145 kiss_fft_cpx scratch[13];
Chris@366 146 kiss_fft_cpx * twiddles = st->twiddles;
Chris@366 147 kiss_fft_cpx *tw;
Chris@366 148 kiss_fft_cpx ya,yb;
Chris@366 149 ya = twiddles[fstride*m];
Chris@366 150 yb = twiddles[fstride*2*m];
Chris@366 151
Chris@366 152 Fout0=Fout;
Chris@366 153 Fout1=Fout0+m;
Chris@366 154 Fout2=Fout0+2*m;
Chris@366 155 Fout3=Fout0+3*m;
Chris@366 156 Fout4=Fout0+4*m;
Chris@366 157
Chris@366 158 tw=st->twiddles;
Chris@366 159 for ( u=0; u<m; ++u ) {
Chris@366 160 C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
Chris@366 161 scratch[0] = *Fout0;
Chris@366 162
Chris@366 163 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
Chris@366 164 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
Chris@366 165 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
Chris@366 166 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
Chris@366 167
Chris@366 168 C_ADD( scratch[7],scratch[1],scratch[4]);
Chris@366 169 C_SUB( scratch[10],scratch[1],scratch[4]);
Chris@366 170 C_ADD( scratch[8],scratch[2],scratch[3]);
Chris@366 171 C_SUB( scratch[9],scratch[2],scratch[3]);
Chris@366 172
Chris@366 173 Fout0->r += scratch[7].r + scratch[8].r;
Chris@366 174 Fout0->i += scratch[7].i + scratch[8].i;
Chris@366 175
Chris@366 176 scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
Chris@366 177 scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
Chris@366 178
Chris@366 179 scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
Chris@366 180 scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
Chris@366 181
Chris@366 182 C_SUB(*Fout1,scratch[5],scratch[6]);
Chris@366 183 C_ADD(*Fout4,scratch[5],scratch[6]);
Chris@366 184
Chris@366 185 scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
Chris@366 186 scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
Chris@366 187 scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
Chris@366 188 scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
Chris@366 189
Chris@366 190 C_ADD(*Fout2,scratch[11],scratch[12]);
Chris@366 191 C_SUB(*Fout3,scratch[11],scratch[12]);
Chris@366 192
Chris@366 193 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
Chris@366 194 }
Chris@366 195 }
Chris@366 196
Chris@366 197 /* perform the butterfly for one stage of a mixed radix FFT */
Chris@366 198 static void kf_bfly_generic(
Chris@366 199 kiss_fft_cpx * Fout,
Chris@366 200 const size_t fstride,
Chris@366 201 const kiss_fft_cfg st,
Chris@366 202 int m,
Chris@366 203 int p
Chris@366 204 )
Chris@366 205 {
Chris@366 206 int u,k,q1,q;
Chris@366 207 kiss_fft_cpx * twiddles = st->twiddles;
Chris@366 208 kiss_fft_cpx t;
Chris@366 209 int Norig = st->nfft;
Chris@366 210
Chris@366 211 kiss_fft_cpx * scratch = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx)*p);
Chris@366 212
Chris@366 213 for ( u=0; u<m; ++u ) {
Chris@366 214 k=u;
Chris@366 215 for ( q1=0 ; q1<p ; ++q1 ) {
Chris@366 216 scratch[q1] = Fout[ k ];
Chris@366 217 C_FIXDIV(scratch[q1],p);
Chris@366 218 k += m;
Chris@366 219 }
Chris@366 220
Chris@366 221 k=u;
Chris@366 222 for ( q1=0 ; q1<p ; ++q1 ) {
Chris@366 223 int twidx=0;
Chris@366 224 Fout[ k ] = scratch[0];
Chris@366 225 for (q=1;q<p;++q ) {
Chris@366 226 twidx += fstride * k;
Chris@366 227 if (twidx>=Norig) twidx-=Norig;
Chris@366 228 C_MUL(t,scratch[q] , twiddles[twidx] );
Chris@366 229 C_ADDTO( Fout[ k ] ,t);
Chris@366 230 }
Chris@366 231 k += m;
Chris@366 232 }
Chris@366 233 }
Chris@366 234 KISS_FFT_TMP_FREE(scratch);
Chris@366 235 }
Chris@366 236
Chris@366 237 static
Chris@366 238 void kf_work(
Chris@366 239 kiss_fft_cpx * Fout,
Chris@366 240 const kiss_fft_cpx * f,
Chris@366 241 const size_t fstride,
Chris@366 242 int in_stride,
Chris@366 243 int * factors,
Chris@366 244 const kiss_fft_cfg st
Chris@366 245 )
Chris@366 246 {
Chris@366 247 kiss_fft_cpx * Fout_beg=Fout;
Chris@366 248 const int p=*factors++; /* the radix */
Chris@366 249 const int m=*factors++; /* stage's fft length/p */
Chris@366 250 const kiss_fft_cpx * Fout_end = Fout + p*m;
Chris@366 251
Chris@366 252 #ifdef _OPENMP
Chris@366 253 // use openmp extensions at the
Chris@366 254 // top-level (not recursive)
Chris@366 255 if (fstride==1 && p<=5)
Chris@366 256 {
Chris@366 257 int k;
Chris@366 258
Chris@366 259 // execute the p different work units in different threads
Chris@366 260 # pragma omp parallel for
Chris@366 261 for (k=0;k<p;++k)
Chris@366 262 kf_work( Fout +k*m, f+ fstride*in_stride*k,fstride*p,in_stride,factors,st);
Chris@366 263 // all threads have joined by this point
Chris@366 264
Chris@366 265 switch (p) {
Chris@366 266 case 2: kf_bfly2(Fout,fstride,st,m); break;
Chris@366 267 case 3: kf_bfly3(Fout,fstride,st,m); break;
Chris@366 268 case 4: kf_bfly4(Fout,fstride,st,m); break;
Chris@366 269 case 5: kf_bfly5(Fout,fstride,st,m); break;
Chris@366 270 default: kf_bfly_generic(Fout,fstride,st,m,p); break;
Chris@366 271 }
Chris@366 272 return;
Chris@366 273 }
Chris@366 274 #endif
Chris@366 275
Chris@366 276 if (m==1) {
Chris@366 277 do{
Chris@366 278 *Fout = *f;
Chris@366 279 f += fstride*in_stride;
Chris@366 280 }while(++Fout != Fout_end );
Chris@366 281 }else{
Chris@366 282 do{
Chris@366 283 // recursive call:
Chris@366 284 // DFT of size m*p performed by doing
Chris@366 285 // p instances of smaller DFTs of size m,
Chris@366 286 // each one takes a decimated version of the input
Chris@366 287 kf_work( Fout , f, fstride*p, in_stride, factors,st);
Chris@366 288 f += fstride*in_stride;
Chris@366 289 }while( (Fout += m) != Fout_end );
Chris@366 290 }
Chris@366 291
Chris@366 292 Fout=Fout_beg;
Chris@366 293
Chris@366 294 // recombine the p smaller DFTs
Chris@366 295 switch (p) {
Chris@366 296 case 2: kf_bfly2(Fout,fstride,st,m); break;
Chris@366 297 case 3: kf_bfly3(Fout,fstride,st,m); break;
Chris@366 298 case 4: kf_bfly4(Fout,fstride,st,m); break;
Chris@366 299 case 5: kf_bfly5(Fout,fstride,st,m); break;
Chris@366 300 default: kf_bfly_generic(Fout,fstride,st,m,p); break;
Chris@366 301 }
Chris@366 302 }
Chris@366 303
Chris@366 304 /* facbuf is populated by p1,m1,p2,m2, ...
Chris@366 305 where
Chris@366 306 p[i] * m[i] = m[i-1]
Chris@366 307 m0 = n */
Chris@366 308 static
Chris@366 309 void kf_factor(int n,int * facbuf)
Chris@366 310 {
Chris@366 311 int p=4;
Chris@366 312 double floor_sqrt;
Chris@366 313 floor_sqrt = floor( sqrt((double)n) );
Chris@366 314
Chris@366 315 /*factor out powers of 4, powers of 2, then any remaining primes */
Chris@366 316 do {
Chris@366 317 while (n % p) {
Chris@366 318 switch (p) {
Chris@366 319 case 4: p = 2; break;
Chris@366 320 case 2: p = 3; break;
Chris@366 321 default: p += 2; break;
Chris@366 322 }
Chris@366 323 if (p > floor_sqrt)
Chris@366 324 p = n; /* no more factors, skip to end */
Chris@366 325 }
Chris@366 326 n /= p;
Chris@366 327 *facbuf++ = p;
Chris@366 328 *facbuf++ = n;
Chris@366 329 } while (n > 1);
Chris@366 330 }
Chris@366 331
Chris@366 332 /*
Chris@366 333 *
Chris@366 334 * User-callable function to allocate all necessary storage space for the fft.
Chris@366 335 *
Chris@366 336 * The return value is a contiguous block of memory, allocated with malloc. As such,
Chris@366 337 * It can be freed with free(), rather than a kiss_fft-specific function.
Chris@366 338 * */
Chris@366 339 kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem )
Chris@366 340 {
Chris@366 341 kiss_fft_cfg st=NULL;
Chris@366 342 size_t memneeded = sizeof(struct kiss_fft_state)
Chris@366 343 + sizeof(kiss_fft_cpx)*(nfft-1); /* twiddle factors*/
Chris@366 344
Chris@366 345 if ( lenmem==NULL ) {
Chris@366 346 st = ( kiss_fft_cfg)KISS_FFT_MALLOC( memneeded );
Chris@366 347 }else{
Chris@366 348 if (mem != NULL && *lenmem >= memneeded)
Chris@366 349 st = (kiss_fft_cfg)mem;
Chris@366 350 *lenmem = memneeded;
Chris@366 351 }
Chris@366 352 if (st) {
Chris@366 353 int i;
Chris@366 354 st->nfft=nfft;
Chris@366 355 st->inverse = inverse_fft;
Chris@366 356
Chris@366 357 for (i=0;i<nfft;++i) {
Chris@366 358 const double pi=3.141592653589793238462643383279502884197169399375105820974944;
Chris@366 359 double phase = -2*pi*i / nfft;
Chris@366 360 if (st->inverse)
Chris@366 361 phase *= -1;
Chris@366 362 kf_cexp(st->twiddles+i, phase );
Chris@366 363 }
Chris@366 364
Chris@366 365 kf_factor(nfft,st->factors);
Chris@366 366 }
Chris@366 367 return st;
Chris@366 368 }
Chris@366 369
Chris@366 370
Chris@366 371 void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
Chris@366 372 {
Chris@366 373 if (fin == fout) {
Chris@366 374 //NOTE: this is not really an in-place FFT algorithm.
Chris@366 375 //It just performs an out-of-place FFT into a temp buffer
Chris@366 376 kiss_fft_cpx * tmpbuf = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC( sizeof(kiss_fft_cpx)*st->nfft);
Chris@366 377 kf_work(tmpbuf,fin,1,in_stride, st->factors,st);
Chris@366 378 memcpy(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft);
Chris@366 379 KISS_FFT_TMP_FREE(tmpbuf);
Chris@366 380 }else{
Chris@366 381 kf_work( fout, fin, 1,in_stride, st->factors,st );
Chris@366 382 }
Chris@366 383 }
Chris@366 384
Chris@366 385 void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
Chris@366 386 {
Chris@366 387 kiss_fft_stride(cfg,fin,fout,1);
Chris@366 388 }
Chris@366 389
Chris@366 390
Chris@366 391 void kiss_fft_cleanup(void)
Chris@366 392 {
Chris@366 393 // nothing needed any more
Chris@366 394 }
Chris@366 395
Chris@366 396 int kiss_fft_next_fast_size(int n)
Chris@366 397 {
Chris@366 398 while(1) {
Chris@366 399 int m=n;
Chris@366 400 while ( (m%2) == 0 ) m/=2;
Chris@366 401 while ( (m%3) == 0 ) m/=3;
Chris@366 402 while ( (m%5) == 0 ) m/=5;
Chris@366 403 if (m<=1)
Chris@366 404 break; /* n is completely factorable by twos, threes, and fives */
Chris@366 405 n++;
Chris@366 406 }
Chris@366 407 return n;
Chris@366 408 }