annotate src/vamp-sdk/ext/kiss_fft.c @ 463:43762dba6747 vampipe

Merge from 454:3949cc56f2ce
author Chris Cannam
date Mon, 10 Oct 2016 15:52:27 +0100
parents b4addbeab790
children af4a1522ef15
rev   line source
Chris@434 1 /*
Chris@434 2 Copyright (c) 2003-2010, Mark Borgerding
Chris@434 3
Chris@434 4 All rights reserved.
Chris@434 5
Chris@434 6 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
Chris@434 7
Chris@434 8 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
Chris@434 9 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
Chris@434 10 * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
Chris@434 11
Chris@434 12 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Chris@434 13 */
Chris@434 14
Chris@434 15
Chris@434 16 #include "_kiss_fft_guts.h"
Chris@434 17 /* The guts header contains all the multiplication and addition macros that are defined for
Chris@434 18 fixed or floating point complex numbers. It also delares the kf_ internal functions.
Chris@434 19 */
Chris@434 20
Chris@434 21 static void kf_bfly2(
Chris@434 22 kiss_fft_cpx * Fout,
Chris@434 23 const size_t fstride,
Chris@434 24 const kiss_fft_cfg st,
Chris@434 25 int m
Chris@434 26 )
Chris@434 27 {
Chris@434 28 kiss_fft_cpx * Fout2;
Chris@434 29 kiss_fft_cpx * tw1 = st->twiddles;
Chris@434 30 kiss_fft_cpx t;
Chris@434 31 Fout2 = Fout + m;
Chris@434 32 do{
Chris@434 33 C_FIXDIV(*Fout,2); C_FIXDIV(*Fout2,2);
Chris@434 34
Chris@434 35 C_MUL (t, *Fout2 , *tw1);
Chris@434 36 tw1 += fstride;
Chris@434 37 C_SUB( *Fout2 , *Fout , t );
Chris@434 38 C_ADDTO( *Fout , t );
Chris@434 39 ++Fout2;
Chris@434 40 ++Fout;
Chris@434 41 }while (--m);
Chris@434 42 }
Chris@434 43
Chris@434 44 static void kf_bfly4(
Chris@434 45 kiss_fft_cpx * Fout,
Chris@434 46 const size_t fstride,
Chris@434 47 const kiss_fft_cfg st,
Chris@434 48 const size_t m
Chris@434 49 )
Chris@434 50 {
Chris@434 51 kiss_fft_cpx *tw1,*tw2,*tw3;
Chris@434 52 kiss_fft_cpx scratch[6];
Chris@434 53 size_t k=m;
Chris@434 54 const size_t m2=2*m;
Chris@434 55 const size_t m3=3*m;
Chris@434 56
Chris@434 57
Chris@434 58 tw3 = tw2 = tw1 = st->twiddles;
Chris@434 59
Chris@434 60 do {
Chris@434 61 C_FIXDIV(*Fout,4); C_FIXDIV(Fout[m],4); C_FIXDIV(Fout[m2],4); C_FIXDIV(Fout[m3],4);
Chris@434 62
Chris@434 63 C_MUL(scratch[0],Fout[m] , *tw1 );
Chris@434 64 C_MUL(scratch[1],Fout[m2] , *tw2 );
Chris@434 65 C_MUL(scratch[2],Fout[m3] , *tw3 );
Chris@434 66
Chris@434 67 C_SUB( scratch[5] , *Fout, scratch[1] );
Chris@434 68 C_ADDTO(*Fout, scratch[1]);
Chris@434 69 C_ADD( scratch[3] , scratch[0] , scratch[2] );
Chris@434 70 C_SUB( scratch[4] , scratch[0] , scratch[2] );
Chris@434 71 C_SUB( Fout[m2], *Fout, scratch[3] );
Chris@434 72 tw1 += fstride;
Chris@434 73 tw2 += fstride*2;
Chris@434 74 tw3 += fstride*3;
Chris@434 75 C_ADDTO( *Fout , scratch[3] );
Chris@434 76
Chris@434 77 if(st->inverse) {
Chris@434 78 Fout[m].r = scratch[5].r - scratch[4].i;
Chris@434 79 Fout[m].i = scratch[5].i + scratch[4].r;
Chris@434 80 Fout[m3].r = scratch[5].r + scratch[4].i;
Chris@434 81 Fout[m3].i = scratch[5].i - scratch[4].r;
Chris@434 82 }else{
Chris@434 83 Fout[m].r = scratch[5].r + scratch[4].i;
Chris@434 84 Fout[m].i = scratch[5].i - scratch[4].r;
Chris@434 85 Fout[m3].r = scratch[5].r - scratch[4].i;
Chris@434 86 Fout[m3].i = scratch[5].i + scratch[4].r;
Chris@434 87 }
Chris@434 88 ++Fout;
Chris@434 89 }while(--k);
Chris@434 90 }
Chris@434 91
Chris@434 92 static void kf_bfly3(
Chris@434 93 kiss_fft_cpx * Fout,
Chris@434 94 const size_t fstride,
Chris@434 95 const kiss_fft_cfg st,
Chris@434 96 size_t m
Chris@434 97 )
Chris@434 98 {
Chris@434 99 size_t k=m;
Chris@434 100 const size_t m2 = 2*m;
Chris@434 101 kiss_fft_cpx *tw1,*tw2;
Chris@434 102 kiss_fft_cpx scratch[5];
Chris@434 103 kiss_fft_cpx epi3;
Chris@434 104 epi3 = st->twiddles[fstride*m];
Chris@434 105
Chris@434 106 tw1=tw2=st->twiddles;
Chris@434 107
Chris@434 108 do{
Chris@434 109 C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
Chris@434 110
Chris@434 111 C_MUL(scratch[1],Fout[m] , *tw1);
Chris@434 112 C_MUL(scratch[2],Fout[m2] , *tw2);
Chris@434 113
Chris@434 114 C_ADD(scratch[3],scratch[1],scratch[2]);
Chris@434 115 C_SUB(scratch[0],scratch[1],scratch[2]);
Chris@434 116 tw1 += fstride;
Chris@434 117 tw2 += fstride*2;
Chris@434 118
Chris@434 119 Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
Chris@434 120 Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
Chris@434 121
Chris@434 122 C_MULBYSCALAR( scratch[0] , epi3.i );
Chris@434 123
Chris@434 124 C_ADDTO(*Fout,scratch[3]);
Chris@434 125
Chris@434 126 Fout[m2].r = Fout[m].r + scratch[0].i;
Chris@434 127 Fout[m2].i = Fout[m].i - scratch[0].r;
Chris@434 128
Chris@434 129 Fout[m].r -= scratch[0].i;
Chris@434 130 Fout[m].i += scratch[0].r;
Chris@434 131
Chris@434 132 ++Fout;
Chris@434 133 }while(--k);
Chris@434 134 }
Chris@434 135
Chris@434 136 static void kf_bfly5(
Chris@434 137 kiss_fft_cpx * Fout,
Chris@434 138 const size_t fstride,
Chris@434 139 const kiss_fft_cfg st,
Chris@434 140 int m
Chris@434 141 )
Chris@434 142 {
Chris@434 143 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
Chris@434 144 int u;
Chris@434 145 kiss_fft_cpx scratch[13];
Chris@434 146 kiss_fft_cpx * twiddles = st->twiddles;
Chris@434 147 kiss_fft_cpx *tw;
Chris@434 148 kiss_fft_cpx ya,yb;
Chris@434 149 ya = twiddles[fstride*m];
Chris@434 150 yb = twiddles[fstride*2*m];
Chris@434 151
Chris@434 152 Fout0=Fout;
Chris@434 153 Fout1=Fout0+m;
Chris@434 154 Fout2=Fout0+2*m;
Chris@434 155 Fout3=Fout0+3*m;
Chris@434 156 Fout4=Fout0+4*m;
Chris@434 157
Chris@434 158 tw=st->twiddles;
Chris@434 159 for ( u=0; u<m; ++u ) {
Chris@434 160 C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
Chris@434 161 scratch[0] = *Fout0;
Chris@434 162
Chris@434 163 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
Chris@434 164 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
Chris@434 165 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
Chris@434 166 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
Chris@434 167
Chris@434 168 C_ADD( scratch[7],scratch[1],scratch[4]);
Chris@434 169 C_SUB( scratch[10],scratch[1],scratch[4]);
Chris@434 170 C_ADD( scratch[8],scratch[2],scratch[3]);
Chris@434 171 C_SUB( scratch[9],scratch[2],scratch[3]);
Chris@434 172
Chris@434 173 Fout0->r += scratch[7].r + scratch[8].r;
Chris@434 174 Fout0->i += scratch[7].i + scratch[8].i;
Chris@434 175
Chris@434 176 scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
Chris@434 177 scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
Chris@434 178
Chris@434 179 scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
Chris@434 180 scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
Chris@434 181
Chris@434 182 C_SUB(*Fout1,scratch[5],scratch[6]);
Chris@434 183 C_ADD(*Fout4,scratch[5],scratch[6]);
Chris@434 184
Chris@434 185 scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
Chris@434 186 scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
Chris@434 187 scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
Chris@434 188 scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
Chris@434 189
Chris@434 190 C_ADD(*Fout2,scratch[11],scratch[12]);
Chris@434 191 C_SUB(*Fout3,scratch[11],scratch[12]);
Chris@434 192
Chris@434 193 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
Chris@434 194 }
Chris@434 195 }
Chris@434 196
Chris@434 197 /* perform the butterfly for one stage of a mixed radix FFT */
Chris@434 198 static void kf_bfly_generic(
Chris@434 199 kiss_fft_cpx * Fout,
Chris@434 200 const size_t fstride,
Chris@434 201 const kiss_fft_cfg st,
Chris@434 202 int m,
Chris@434 203 int p
Chris@434 204 )
Chris@434 205 {
Chris@434 206 int u,k,q1,q;
Chris@434 207 kiss_fft_cpx * twiddles = st->twiddles;
Chris@434 208 kiss_fft_cpx t;
Chris@434 209 int Norig = st->nfft;
Chris@434 210
Chris@434 211 kiss_fft_cpx * scratch = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx)*p);
Chris@434 212
Chris@434 213 for ( u=0; u<m; ++u ) {
Chris@434 214 k=u;
Chris@434 215 for ( q1=0 ; q1<p ; ++q1 ) {
Chris@434 216 scratch[q1] = Fout[ k ];
Chris@434 217 C_FIXDIV(scratch[q1],p);
Chris@434 218 k += m;
Chris@434 219 }
Chris@434 220
Chris@434 221 k=u;
Chris@434 222 for ( q1=0 ; q1<p ; ++q1 ) {
Chris@434 223 int twidx=0;
Chris@434 224 Fout[ k ] = scratch[0];
Chris@434 225 for (q=1;q<p;++q ) {
Chris@434 226 twidx += fstride * k;
Chris@434 227 if (twidx>=Norig) twidx-=Norig;
Chris@434 228 C_MUL(t,scratch[q] , twiddles[twidx] );
Chris@434 229 C_ADDTO( Fout[ k ] ,t);
Chris@434 230 }
Chris@434 231 k += m;
Chris@434 232 }
Chris@434 233 }
Chris@434 234 KISS_FFT_TMP_FREE(scratch);
Chris@434 235 }
Chris@434 236
Chris@434 237 static
Chris@434 238 void kf_work(
Chris@434 239 kiss_fft_cpx * Fout,
Chris@434 240 const kiss_fft_cpx * f,
Chris@434 241 const size_t fstride,
Chris@434 242 int in_stride,
Chris@434 243 int * factors,
Chris@434 244 const kiss_fft_cfg st
Chris@434 245 )
Chris@434 246 {
Chris@434 247 kiss_fft_cpx * Fout_beg=Fout;
Chris@434 248 const int p=*factors++; /* the radix */
Chris@434 249 const int m=*factors++; /* stage's fft length/p */
Chris@434 250 const kiss_fft_cpx * Fout_end = Fout + p*m;
Chris@434 251
Chris@434 252 #ifdef _OPENMP
Chris@434 253 // use openmp extensions at the
Chris@434 254 // top-level (not recursive)
Chris@434 255 if (fstride==1 && p<=5)
Chris@434 256 {
Chris@434 257 int k;
Chris@434 258
Chris@434 259 // execute the p different work units in different threads
Chris@434 260 # pragma omp parallel for
Chris@434 261 for (k=0;k<p;++k)
Chris@434 262 kf_work( Fout +k*m, f+ fstride*in_stride*k,fstride*p,in_stride,factors,st);
Chris@434 263 // all threads have joined by this point
Chris@434 264
Chris@434 265 switch (p) {
Chris@434 266 case 2: kf_bfly2(Fout,fstride,st,m); break;
Chris@434 267 case 3: kf_bfly3(Fout,fstride,st,m); break;
Chris@434 268 case 4: kf_bfly4(Fout,fstride,st,m); break;
Chris@434 269 case 5: kf_bfly5(Fout,fstride,st,m); break;
Chris@434 270 default: kf_bfly_generic(Fout,fstride,st,m,p); break;
Chris@434 271 }
Chris@434 272 return;
Chris@434 273 }
Chris@434 274 #endif
Chris@434 275
Chris@434 276 if (m==1) {
Chris@434 277 do{
Chris@451 278 Fout->r = f->r;
Chris@451 279 Fout->i = f->i;
Chris@434 280 f += fstride*in_stride;
Chris@434 281 }while(++Fout != Fout_end );
Chris@434 282 }else{
Chris@434 283 do{
Chris@434 284 // recursive call:
Chris@434 285 // DFT of size m*p performed by doing
Chris@434 286 // p instances of smaller DFTs of size m,
Chris@434 287 // each one takes a decimated version of the input
Chris@434 288 kf_work( Fout , f, fstride*p, in_stride, factors,st);
Chris@434 289 f += fstride*in_stride;
Chris@434 290 }while( (Fout += m) != Fout_end );
Chris@434 291 }
Chris@434 292
Chris@434 293 Fout=Fout_beg;
Chris@434 294
Chris@434 295 // recombine the p smaller DFTs
Chris@434 296 switch (p) {
Chris@434 297 case 2: kf_bfly2(Fout,fstride,st,m); break;
Chris@434 298 case 3: kf_bfly3(Fout,fstride,st,m); break;
Chris@434 299 case 4: kf_bfly4(Fout,fstride,st,m); break;
Chris@434 300 case 5: kf_bfly5(Fout,fstride,st,m); break;
Chris@434 301 default: kf_bfly_generic(Fout,fstride,st,m,p); break;
Chris@434 302 }
Chris@434 303 }
Chris@434 304
Chris@434 305 /* facbuf is populated by p1,m1,p2,m2, ...
Chris@434 306 where
Chris@434 307 p[i] * m[i] = m[i-1]
Chris@434 308 m0 = n */
Chris@434 309 static
Chris@434 310 void kf_factor(int n,int * facbuf)
Chris@434 311 {
Chris@434 312 int p=4;
Chris@434 313 double floor_sqrt;
Chris@434 314 floor_sqrt = floor( sqrt((double)n) );
Chris@434 315
Chris@434 316 /*factor out powers of 4, powers of 2, then any remaining primes */
Chris@434 317 do {
Chris@434 318 while (n % p) {
Chris@434 319 switch (p) {
Chris@434 320 case 4: p = 2; break;
Chris@434 321 case 2: p = 3; break;
Chris@434 322 default: p += 2; break;
Chris@434 323 }
Chris@434 324 if (p > floor_sqrt)
Chris@434 325 p = n; /* no more factors, skip to end */
Chris@434 326 }
Chris@434 327 n /= p;
Chris@434 328 *facbuf++ = p;
Chris@434 329 *facbuf++ = n;
Chris@434 330 } while (n > 1);
Chris@434 331 }
Chris@434 332
Chris@434 333 /*
Chris@434 334 *
Chris@434 335 * User-callable function to allocate all necessary storage space for the fft.
Chris@434 336 *
Chris@434 337 * The return value is a contiguous block of memory, allocated with malloc. As such,
Chris@434 338 * It can be freed with free(), rather than a kiss_fft-specific function.
Chris@434 339 * */
Chris@434 340 kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem )
Chris@434 341 {
Chris@434 342 kiss_fft_cfg st=NULL;
Chris@434 343 size_t memneeded = sizeof(struct kiss_fft_state)
Chris@434 344 + sizeof(kiss_fft_cpx)*(nfft-1); /* twiddle factors*/
Chris@434 345
Chris@434 346 if ( lenmem==NULL ) {
Chris@434 347 st = ( kiss_fft_cfg)KISS_FFT_MALLOC( memneeded );
Chris@434 348 }else{
Chris@434 349 if (mem != NULL && *lenmem >= memneeded)
Chris@434 350 st = (kiss_fft_cfg)mem;
Chris@434 351 *lenmem = memneeded;
Chris@434 352 }
Chris@434 353 if (st) {
Chris@434 354 int i;
Chris@434 355 st->nfft=nfft;
Chris@434 356 st->inverse = inverse_fft;
Chris@434 357
Chris@434 358 for (i=0;i<nfft;++i) {
Chris@434 359 const double pi=3.141592653589793238462643383279502884197169399375105820974944;
Chris@434 360 double phase = -2*pi*i / nfft;
Chris@434 361 if (st->inverse)
Chris@434 362 phase *= -1;
Chris@434 363 kf_cexp(st->twiddles+i, phase );
Chris@434 364 }
Chris@434 365
Chris@434 366 kf_factor(nfft,st->factors);
Chris@434 367 }
Chris@434 368 return st;
Chris@434 369 }
Chris@434 370
Chris@434 371
Chris@434 372 void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
Chris@434 373 {
Chris@434 374 if (fin == fout) {
Chris@434 375 //NOTE: this is not really an in-place FFT algorithm.
Chris@434 376 //It just performs an out-of-place FFT into a temp buffer
Chris@434 377 kiss_fft_cpx * tmpbuf = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC( sizeof(kiss_fft_cpx)*st->nfft);
Chris@434 378 kf_work(tmpbuf,fin,1,in_stride, st->factors,st);
Chris@434 379 memcpy(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft);
Chris@434 380 KISS_FFT_TMP_FREE(tmpbuf);
Chris@434 381 }else{
Chris@434 382 kf_work( fout, fin, 1,in_stride, st->factors,st );
Chris@434 383 }
Chris@434 384 }
Chris@434 385
Chris@434 386 void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
Chris@434 387 {
Chris@434 388 kiss_fft_stride(cfg,fin,fout,1);
Chris@434 389 }
Chris@434 390
Chris@434 391
Chris@434 392 void kiss_fft_cleanup(void)
Chris@434 393 {
Chris@434 394 // nothing needed any more
Chris@434 395 }
Chris@434 396
Chris@434 397 int kiss_fft_next_fast_size(int n)
Chris@434 398 {
Chris@434 399 while(1) {
Chris@434 400 int m=n;
Chris@434 401 while ( (m%2) == 0 ) m/=2;
Chris@434 402 while ( (m%3) == 0 ) m/=3;
Chris@434 403 while ( (m%5) == 0 ) m/=5;
Chris@434 404 if (m<=1)
Chris@434 405 break; /* n is completely factorable by twos, threes, and fives */
Chris@434 406 n++;
Chris@434 407 }
Chris@434 408 return n;
Chris@434 409 }