annotate src/vamp-sdk/ext/vamp_kiss_fft.c @ 521:62987b6d6a3b

Looks like choco currently has a broken version of wget; use an older one
author Chris Cannam
date Thu, 16 May 2019 12:45:08 +0100
parents 90571dcc371a
children
rev   line source
Chris@434 1 /*
Chris@434 2 Copyright (c) 2003-2010, Mark Borgerding
Chris@434 3
Chris@434 4 All rights reserved.
Chris@434 5
Chris@434 6 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
Chris@434 7
Chris@434 8 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
Chris@434 9 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
Chris@434 10 * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
Chris@434 11
Chris@434 12 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Chris@434 13 */
Chris@434 14
Chris@434 15
Chris@501 16 #include "vamp_kiss_fft_guts.h"
Chris@434 17 /* The guts header contains all the multiplication and addition macros that are defined for
Chris@434 18 fixed or floating point complex numbers. It also delares the kf_ internal functions.
Chris@434 19 */
Chris@434 20
Chris@434 21 static void kf_bfly2(
Chris@501 22 vamp_kiss_fft_cpx * Fout,
Chris@434 23 const size_t fstride,
Chris@501 24 const vamp_kiss_fft_cfg st,
Chris@434 25 int m
Chris@434 26 )
Chris@434 27 {
Chris@501 28 vamp_kiss_fft_cpx * Fout2;
Chris@501 29 vamp_kiss_fft_cpx * tw1 = st->twiddles;
Chris@501 30 vamp_kiss_fft_cpx t;
Chris@434 31 Fout2 = Fout + m;
Chris@434 32 do{
Chris@434 33 C_FIXDIV(*Fout,2); C_FIXDIV(*Fout2,2);
Chris@434 34
Chris@434 35 C_MUL (t, *Fout2 , *tw1);
Chris@434 36 tw1 += fstride;
Chris@434 37 C_SUB( *Fout2 , *Fout , t );
Chris@434 38 C_ADDTO( *Fout , t );
Chris@434 39 ++Fout2;
Chris@434 40 ++Fout;
Chris@434 41 }while (--m);
Chris@434 42 }
Chris@434 43
Chris@434 44 static void kf_bfly4(
Chris@501 45 vamp_kiss_fft_cpx * Fout,
Chris@434 46 const size_t fstride,
Chris@501 47 const vamp_kiss_fft_cfg st,
Chris@434 48 const size_t m
Chris@434 49 )
Chris@434 50 {
Chris@501 51 vamp_kiss_fft_cpx *tw1,*tw2,*tw3;
Chris@501 52 vamp_kiss_fft_cpx scratch[6];
Chris@434 53 size_t k=m;
Chris@434 54 const size_t m2=2*m;
Chris@434 55 const size_t m3=3*m;
Chris@434 56
Chris@434 57
Chris@434 58 tw3 = tw2 = tw1 = st->twiddles;
Chris@434 59
Chris@434 60 do {
Chris@434 61 C_FIXDIV(*Fout,4); C_FIXDIV(Fout[m],4); C_FIXDIV(Fout[m2],4); C_FIXDIV(Fout[m3],4);
Chris@434 62
Chris@434 63 C_MUL(scratch[0],Fout[m] , *tw1 );
Chris@434 64 C_MUL(scratch[1],Fout[m2] , *tw2 );
Chris@434 65 C_MUL(scratch[2],Fout[m3] , *tw3 );
Chris@434 66
Chris@434 67 C_SUB( scratch[5] , *Fout, scratch[1] );
Chris@434 68 C_ADDTO(*Fout, scratch[1]);
Chris@434 69 C_ADD( scratch[3] , scratch[0] , scratch[2] );
Chris@434 70 C_SUB( scratch[4] , scratch[0] , scratch[2] );
Chris@434 71 C_SUB( Fout[m2], *Fout, scratch[3] );
Chris@434 72 tw1 += fstride;
Chris@434 73 tw2 += fstride*2;
Chris@434 74 tw3 += fstride*3;
Chris@434 75 C_ADDTO( *Fout , scratch[3] );
Chris@434 76
Chris@434 77 if(st->inverse) {
Chris@434 78 Fout[m].r = scratch[5].r - scratch[4].i;
Chris@434 79 Fout[m].i = scratch[5].i + scratch[4].r;
Chris@434 80 Fout[m3].r = scratch[5].r + scratch[4].i;
Chris@434 81 Fout[m3].i = scratch[5].i - scratch[4].r;
Chris@434 82 }else{
Chris@434 83 Fout[m].r = scratch[5].r + scratch[4].i;
Chris@434 84 Fout[m].i = scratch[5].i - scratch[4].r;
Chris@434 85 Fout[m3].r = scratch[5].r - scratch[4].i;
Chris@434 86 Fout[m3].i = scratch[5].i + scratch[4].r;
Chris@434 87 }
Chris@434 88 ++Fout;
Chris@434 89 }while(--k);
Chris@434 90 }
Chris@434 91
Chris@434 92 static void kf_bfly3(
Chris@501 93 vamp_kiss_fft_cpx * Fout,
Chris@434 94 const size_t fstride,
Chris@501 95 const vamp_kiss_fft_cfg st,
Chris@434 96 size_t m
Chris@434 97 )
Chris@434 98 {
Chris@434 99 size_t k=m;
Chris@434 100 const size_t m2 = 2*m;
Chris@501 101 vamp_kiss_fft_cpx *tw1,*tw2;
Chris@501 102 vamp_kiss_fft_cpx scratch[5];
Chris@501 103 vamp_kiss_fft_cpx epi3;
Chris@434 104 epi3 = st->twiddles[fstride*m];
Chris@434 105
Chris@434 106 tw1=tw2=st->twiddles;
Chris@434 107
Chris@434 108 do{
Chris@434 109 C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
Chris@434 110
Chris@434 111 C_MUL(scratch[1],Fout[m] , *tw1);
Chris@434 112 C_MUL(scratch[2],Fout[m2] , *tw2);
Chris@434 113
Chris@434 114 C_ADD(scratch[3],scratch[1],scratch[2]);
Chris@434 115 C_SUB(scratch[0],scratch[1],scratch[2]);
Chris@434 116 tw1 += fstride;
Chris@434 117 tw2 += fstride*2;
Chris@434 118
Chris@434 119 Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
Chris@434 120 Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
Chris@434 121
Chris@434 122 C_MULBYSCALAR( scratch[0] , epi3.i );
Chris@434 123
Chris@434 124 C_ADDTO(*Fout,scratch[3]);
Chris@434 125
Chris@434 126 Fout[m2].r = Fout[m].r + scratch[0].i;
Chris@434 127 Fout[m2].i = Fout[m].i - scratch[0].r;
Chris@434 128
Chris@434 129 Fout[m].r -= scratch[0].i;
Chris@434 130 Fout[m].i += scratch[0].r;
Chris@434 131
Chris@434 132 ++Fout;
Chris@434 133 }while(--k);
Chris@434 134 }
Chris@434 135
Chris@434 136 static void kf_bfly5(
Chris@501 137 vamp_kiss_fft_cpx * Fout,
Chris@434 138 const size_t fstride,
Chris@501 139 const vamp_kiss_fft_cfg st,
Chris@434 140 int m
Chris@434 141 )
Chris@434 142 {
Chris@501 143 vamp_kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
Chris@434 144 int u;
Chris@501 145 vamp_kiss_fft_cpx scratch[13];
Chris@501 146 vamp_kiss_fft_cpx * twiddles = st->twiddles;
Chris@501 147 vamp_kiss_fft_cpx *tw;
Chris@501 148 vamp_kiss_fft_cpx ya,yb;
Chris@434 149 ya = twiddles[fstride*m];
Chris@434 150 yb = twiddles[fstride*2*m];
Chris@434 151
Chris@434 152 Fout0=Fout;
Chris@434 153 Fout1=Fout0+m;
Chris@434 154 Fout2=Fout0+2*m;
Chris@434 155 Fout3=Fout0+3*m;
Chris@434 156 Fout4=Fout0+4*m;
Chris@434 157
Chris@434 158 tw=st->twiddles;
Chris@434 159 for ( u=0; u<m; ++u ) {
Chris@434 160 C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
Chris@434 161 scratch[0] = *Fout0;
Chris@434 162
Chris@434 163 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
Chris@434 164 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
Chris@434 165 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
Chris@434 166 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
Chris@434 167
Chris@434 168 C_ADD( scratch[7],scratch[1],scratch[4]);
Chris@434 169 C_SUB( scratch[10],scratch[1],scratch[4]);
Chris@434 170 C_ADD( scratch[8],scratch[2],scratch[3]);
Chris@434 171 C_SUB( scratch[9],scratch[2],scratch[3]);
Chris@434 172
Chris@434 173 Fout0->r += scratch[7].r + scratch[8].r;
Chris@434 174 Fout0->i += scratch[7].i + scratch[8].i;
Chris@434 175
Chris@434 176 scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
Chris@434 177 scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
Chris@434 178
Chris@434 179 scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
Chris@434 180 scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
Chris@434 181
Chris@434 182 C_SUB(*Fout1,scratch[5],scratch[6]);
Chris@434 183 C_ADD(*Fout4,scratch[5],scratch[6]);
Chris@434 184
Chris@434 185 scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
Chris@434 186 scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
Chris@434 187 scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
Chris@434 188 scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
Chris@434 189
Chris@434 190 C_ADD(*Fout2,scratch[11],scratch[12]);
Chris@434 191 C_SUB(*Fout3,scratch[11],scratch[12]);
Chris@434 192
Chris@434 193 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
Chris@434 194 }
Chris@434 195 }
Chris@434 196
Chris@434 197 /* perform the butterfly for one stage of a mixed radix FFT */
Chris@434 198 static void kf_bfly_generic(
Chris@501 199 vamp_kiss_fft_cpx * Fout,
Chris@434 200 const size_t fstride,
Chris@501 201 const vamp_kiss_fft_cfg st,
Chris@434 202 int m,
Chris@434 203 int p
Chris@434 204 )
Chris@434 205 {
Chris@434 206 int u,k,q1,q;
Chris@501 207 vamp_kiss_fft_cpx * twiddles = st->twiddles;
Chris@501 208 vamp_kiss_fft_cpx t;
Chris@434 209 int Norig = st->nfft;
Chris@434 210
Chris@501 211 vamp_kiss_fft_cpx * scratch = (vamp_kiss_fft_cpx*)VAMP_KISS_FFT_TMP_ALLOC(sizeof(vamp_kiss_fft_cpx)*p);
Chris@434 212
Chris@434 213 for ( u=0; u<m; ++u ) {
Chris@434 214 k=u;
Chris@434 215 for ( q1=0 ; q1<p ; ++q1 ) {
Chris@434 216 scratch[q1] = Fout[ k ];
Chris@434 217 C_FIXDIV(scratch[q1],p);
Chris@434 218 k += m;
Chris@434 219 }
Chris@434 220
Chris@434 221 k=u;
Chris@434 222 for ( q1=0 ; q1<p ; ++q1 ) {
Chris@434 223 int twidx=0;
Chris@434 224 Fout[ k ] = scratch[0];
Chris@434 225 for (q=1;q<p;++q ) {
Chris@470 226 twidx += int(fstride * k);
Chris@434 227 if (twidx>=Norig) twidx-=Norig;
Chris@434 228 C_MUL(t,scratch[q] , twiddles[twidx] );
Chris@434 229 C_ADDTO( Fout[ k ] ,t);
Chris@434 230 }
Chris@434 231 k += m;
Chris@434 232 }
Chris@434 233 }
Chris@501 234 VAMP_KISS_FFT_TMP_FREE(scratch);
Chris@434 235 }
Chris@434 236
Chris@434 237 static
Chris@434 238 void kf_work(
Chris@501 239 vamp_kiss_fft_cpx * Fout,
Chris@501 240 const vamp_kiss_fft_cpx * f,
Chris@434 241 const size_t fstride,
Chris@434 242 int in_stride,
Chris@434 243 int * factors,
Chris@501 244 const vamp_kiss_fft_cfg st
Chris@434 245 )
Chris@434 246 {
Chris@501 247 vamp_kiss_fft_cpx * Fout_beg=Fout;
Chris@434 248 const int p=*factors++; /* the radix */
Chris@434 249 const int m=*factors++; /* stage's fft length/p */
Chris@501 250 const vamp_kiss_fft_cpx * Fout_end = Fout + p*m;
Chris@434 251
Chris@434 252 #ifdef _OPENMP
Chris@434 253 // use openmp extensions at the
Chris@434 254 // top-level (not recursive)
Chris@434 255 if (fstride==1 && p<=5)
Chris@434 256 {
Chris@434 257 int k;
Chris@434 258
Chris@434 259 // execute the p different work units in different threads
Chris@434 260 # pragma omp parallel for
Chris@434 261 for (k=0;k<p;++k)
Chris@434 262 kf_work( Fout +k*m, f+ fstride*in_stride*k,fstride*p,in_stride,factors,st);
Chris@434 263 // all threads have joined by this point
Chris@434 264
Chris@434 265 switch (p) {
Chris@434 266 case 2: kf_bfly2(Fout,fstride,st,m); break;
Chris@434 267 case 3: kf_bfly3(Fout,fstride,st,m); break;
Chris@434 268 case 4: kf_bfly4(Fout,fstride,st,m); break;
Chris@434 269 case 5: kf_bfly5(Fout,fstride,st,m); break;
Chris@434 270 default: kf_bfly_generic(Fout,fstride,st,m,p); break;
Chris@434 271 }
Chris@434 272 return;
Chris@434 273 }
Chris@434 274 #endif
Chris@434 275
Chris@434 276 if (m==1) {
Chris@434 277 do{
Chris@451 278 Fout->r = f->r;
Chris@451 279 Fout->i = f->i;
Chris@434 280 f += fstride*in_stride;
Chris@434 281 }while(++Fout != Fout_end );
Chris@434 282 }else{
Chris@434 283 do{
Chris@434 284 // recursive call:
Chris@434 285 // DFT of size m*p performed by doing
Chris@434 286 // p instances of smaller DFTs of size m,
Chris@434 287 // each one takes a decimated version of the input
Chris@434 288 kf_work( Fout , f, fstride*p, in_stride, factors,st);
Chris@434 289 f += fstride*in_stride;
Chris@434 290 }while( (Fout += m) != Fout_end );
Chris@434 291 }
Chris@434 292
Chris@434 293 Fout=Fout_beg;
Chris@434 294
Chris@434 295 // recombine the p smaller DFTs
Chris@434 296 switch (p) {
Chris@434 297 case 2: kf_bfly2(Fout,fstride,st,m); break;
Chris@434 298 case 3: kf_bfly3(Fout,fstride,st,m); break;
Chris@434 299 case 4: kf_bfly4(Fout,fstride,st,m); break;
Chris@434 300 case 5: kf_bfly5(Fout,fstride,st,m); break;
Chris@434 301 default: kf_bfly_generic(Fout,fstride,st,m,p); break;
Chris@434 302 }
Chris@434 303 }
Chris@434 304
Chris@434 305 /* facbuf is populated by p1,m1,p2,m2, ...
Chris@434 306 where
Chris@434 307 p[i] * m[i] = m[i-1]
Chris@434 308 m0 = n */
Chris@434 309 static
Chris@434 310 void kf_factor(int n,int * facbuf)
Chris@434 311 {
Chris@434 312 int p=4;
Chris@434 313 double floor_sqrt;
Chris@434 314 floor_sqrt = floor( sqrt((double)n) );
Chris@434 315
Chris@434 316 /*factor out powers of 4, powers of 2, then any remaining primes */
Chris@434 317 do {
Chris@434 318 while (n % p) {
Chris@434 319 switch (p) {
Chris@434 320 case 4: p = 2; break;
Chris@434 321 case 2: p = 3; break;
Chris@434 322 default: p += 2; break;
Chris@434 323 }
Chris@434 324 if (p > floor_sqrt)
Chris@434 325 p = n; /* no more factors, skip to end */
Chris@434 326 }
Chris@434 327 n /= p;
Chris@434 328 *facbuf++ = p;
Chris@434 329 *facbuf++ = n;
Chris@434 330 } while (n > 1);
Chris@434 331 }
Chris@434 332
Chris@434 333 /*
Chris@434 334 *
Chris@434 335 * User-callable function to allocate all necessary storage space for the fft.
Chris@434 336 *
Chris@434 337 * The return value is a contiguous block of memory, allocated with malloc. As such,
Chris@434 338 * It can be freed with free(), rather than a kiss_fft-specific function.
Chris@434 339 * */
Chris@501 340 vamp_kiss_fft_cfg vamp_kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem )
Chris@434 341 {
Chris@501 342 vamp_kiss_fft_cfg st=NULL;
Chris@501 343 size_t memneeded = sizeof(struct vamp_kiss_fft_state)
Chris@501 344 + sizeof(vamp_kiss_fft_cpx)*(nfft-1); /* twiddle factors*/
Chris@434 345
Chris@434 346 if ( lenmem==NULL ) {
Chris@501 347 st = ( vamp_kiss_fft_cfg)VAMP_KISS_FFT_MALLOC( memneeded );
Chris@434 348 }else{
Chris@434 349 if (mem != NULL && *lenmem >= memneeded)
Chris@501 350 st = (vamp_kiss_fft_cfg)mem;
Chris@434 351 *lenmem = memneeded;
Chris@434 352 }
Chris@434 353 if (st) {
Chris@434 354 int i;
Chris@434 355 st->nfft=nfft;
Chris@434 356 st->inverse = inverse_fft;
Chris@434 357
Chris@434 358 for (i=0;i<nfft;++i) {
Chris@434 359 const double pi=3.141592653589793238462643383279502884197169399375105820974944;
Chris@434 360 double phase = -2*pi*i / nfft;
Chris@434 361 if (st->inverse)
Chris@434 362 phase *= -1;
Chris@434 363 kf_cexp(st->twiddles+i, phase );
Chris@434 364 }
Chris@434 365
Chris@434 366 kf_factor(nfft,st->factors);
Chris@434 367 }
Chris@434 368 return st;
Chris@434 369 }
Chris@434 370
Chris@501 371 void vamp_kiss_fft_free(void *mem)
Chris@501 372 {
Chris@501 373 free(mem);
Chris@501 374 }
Chris@434 375
Chris@501 376 void vamp_kiss_fft_stride(vamp_kiss_fft_cfg st,const vamp_kiss_fft_cpx *fin,vamp_kiss_fft_cpx *fout,int in_stride)
Chris@434 377 {
Chris@434 378 if (fin == fout) {
Chris@434 379 //NOTE: this is not really an in-place FFT algorithm.
Chris@434 380 //It just performs an out-of-place FFT into a temp buffer
Chris@501 381 vamp_kiss_fft_cpx * tmpbuf = (vamp_kiss_fft_cpx*)VAMP_KISS_FFT_TMP_ALLOC( sizeof(vamp_kiss_fft_cpx)*st->nfft);
Chris@434 382 kf_work(tmpbuf,fin,1,in_stride, st->factors,st);
Chris@501 383 memcpy(fout,tmpbuf,sizeof(vamp_kiss_fft_cpx)*st->nfft);
Chris@501 384 VAMP_KISS_FFT_TMP_FREE(tmpbuf);
Chris@434 385 }else{
Chris@434 386 kf_work( fout, fin, 1,in_stride, st->factors,st );
Chris@434 387 }
Chris@434 388 }
Chris@434 389
Chris@501 390 void vamp_kiss_fft(vamp_kiss_fft_cfg cfg,const vamp_kiss_fft_cpx *fin,vamp_kiss_fft_cpx *fout)
Chris@434 391 {
Chris@501 392 vamp_kiss_fft_stride(cfg,fin,fout,1);
Chris@434 393 }
Chris@434 394
Chris@434 395
Chris@501 396 void vamp_kiss_fft_cleanup(void)
Chris@434 397 {
Chris@434 398 // nothing needed any more
Chris@434 399 }
Chris@434 400
Chris@501 401 int vamp_kiss_fft_next_fast_size(int n)
Chris@434 402 {
Chris@434 403 while(1) {
Chris@434 404 int m=n;
Chris@434 405 while ( (m%2) == 0 ) m/=2;
Chris@434 406 while ( (m%3) == 0 ) m/=3;
Chris@434 407 while ( (m%5) == 0 ) m/=5;
Chris@434 408 if (m<=1)
Chris@434 409 break; /* n is completely factorable by twos, threes, and fives */
Chris@434 410 n++;
Chris@434 411 }
Chris@434 412 return n;
Chris@434 413 }