annotate src/opus-1.3/celt/kiss_fft.c @ 75:8f7fd4420df7

Add some cross-platform Boost headers
author Chris Cannam
date Sat, 16 Feb 2019 16:31:25 +0000
parents 7aeed7906520
children
rev   line source
Chris@69 1 /*Copyright (c) 2003-2004, Mark Borgerding
Chris@69 2 Lots of modifications by Jean-Marc Valin
Chris@69 3 Copyright (c) 2005-2007, Xiph.Org Foundation
Chris@69 4 Copyright (c) 2008, Xiph.Org Foundation, CSIRO
Chris@69 5
Chris@69 6 All rights reserved.
Chris@69 7
Chris@69 8 Redistribution and use in source and binary forms, with or without
Chris@69 9 modification, are permitted provided that the following conditions are met:
Chris@69 10
Chris@69 11 * Redistributions of source code must retain the above copyright notice,
Chris@69 12 this list of conditions and the following disclaimer.
Chris@69 13 * Redistributions in binary form must reproduce the above copyright notice,
Chris@69 14 this list of conditions and the following disclaimer in the
Chris@69 15 documentation and/or other materials provided with the distribution.
Chris@69 16
Chris@69 17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
Chris@69 18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
Chris@69 19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
Chris@69 20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
Chris@69 21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
Chris@69 22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
Chris@69 23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
Chris@69 24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
Chris@69 25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
Chris@69 26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
Chris@69 27 POSSIBILITY OF SUCH DAMAGE.*/
Chris@69 28
Chris@69 29 /* This code is originally from Mark Borgerding's KISS-FFT but has been
Chris@69 30 heavily modified to better suit Opus */
Chris@69 31
Chris@69 32 #ifndef SKIP_CONFIG_H
Chris@69 33 # ifdef HAVE_CONFIG_H
Chris@69 34 # include "config.h"
Chris@69 35 # endif
Chris@69 36 #endif
Chris@69 37
Chris@69 38 #include "_kiss_fft_guts.h"
Chris@69 39 #include "arch.h"
Chris@69 40 #include "os_support.h"
Chris@69 41 #include "mathops.h"
Chris@69 42 #include "stack_alloc.h"
Chris@69 43
Chris@69 44 /* The guts header contains all the multiplication and addition macros that are defined for
Chris@69 45 complex numbers. It also delares the kf_ internal functions.
Chris@69 46 */
Chris@69 47
Chris@69 48 static void kf_bfly2(
Chris@69 49 kiss_fft_cpx * Fout,
Chris@69 50 int m,
Chris@69 51 int N
Chris@69 52 )
Chris@69 53 {
Chris@69 54 kiss_fft_cpx * Fout2;
Chris@69 55 int i;
Chris@69 56 (void)m;
Chris@69 57 #ifdef CUSTOM_MODES
Chris@69 58 if (m==1)
Chris@69 59 {
Chris@69 60 celt_assert(m==1);
Chris@69 61 for (i=0;i<N;i++)
Chris@69 62 {
Chris@69 63 kiss_fft_cpx t;
Chris@69 64 Fout2 = Fout + 1;
Chris@69 65 t = *Fout2;
Chris@69 66 C_SUB( *Fout2 , *Fout , t );
Chris@69 67 C_ADDTO( *Fout , t );
Chris@69 68 Fout += 2;
Chris@69 69 }
Chris@69 70 } else
Chris@69 71 #endif
Chris@69 72 {
Chris@69 73 opus_val16 tw;
Chris@69 74 tw = QCONST16(0.7071067812f, 15);
Chris@69 75 /* We know that m==4 here because the radix-2 is just after a radix-4 */
Chris@69 76 celt_assert(m==4);
Chris@69 77 for (i=0;i<N;i++)
Chris@69 78 {
Chris@69 79 kiss_fft_cpx t;
Chris@69 80 Fout2 = Fout + 4;
Chris@69 81 t = Fout2[0];
Chris@69 82 C_SUB( Fout2[0] , Fout[0] , t );
Chris@69 83 C_ADDTO( Fout[0] , t );
Chris@69 84
Chris@69 85 t.r = S_MUL(ADD32_ovflw(Fout2[1].r, Fout2[1].i), tw);
Chris@69 86 t.i = S_MUL(SUB32_ovflw(Fout2[1].i, Fout2[1].r), tw);
Chris@69 87 C_SUB( Fout2[1] , Fout[1] , t );
Chris@69 88 C_ADDTO( Fout[1] , t );
Chris@69 89
Chris@69 90 t.r = Fout2[2].i;
Chris@69 91 t.i = -Fout2[2].r;
Chris@69 92 C_SUB( Fout2[2] , Fout[2] , t );
Chris@69 93 C_ADDTO( Fout[2] , t );
Chris@69 94
Chris@69 95 t.r = S_MUL(SUB32_ovflw(Fout2[3].i, Fout2[3].r), tw);
Chris@69 96 t.i = S_MUL(NEG32_ovflw(ADD32_ovflw(Fout2[3].i, Fout2[3].r)), tw);
Chris@69 97 C_SUB( Fout2[3] , Fout[3] , t );
Chris@69 98 C_ADDTO( Fout[3] , t );
Chris@69 99 Fout += 8;
Chris@69 100 }
Chris@69 101 }
Chris@69 102 }
Chris@69 103
Chris@69 104 static void kf_bfly4(
Chris@69 105 kiss_fft_cpx * Fout,
Chris@69 106 const size_t fstride,
Chris@69 107 const kiss_fft_state *st,
Chris@69 108 int m,
Chris@69 109 int N,
Chris@69 110 int mm
Chris@69 111 )
Chris@69 112 {
Chris@69 113 int i;
Chris@69 114
Chris@69 115 if (m==1)
Chris@69 116 {
Chris@69 117 /* Degenerate case where all the twiddles are 1. */
Chris@69 118 for (i=0;i<N;i++)
Chris@69 119 {
Chris@69 120 kiss_fft_cpx scratch0, scratch1;
Chris@69 121
Chris@69 122 C_SUB( scratch0 , *Fout, Fout[2] );
Chris@69 123 C_ADDTO(*Fout, Fout[2]);
Chris@69 124 C_ADD( scratch1 , Fout[1] , Fout[3] );
Chris@69 125 C_SUB( Fout[2], *Fout, scratch1 );
Chris@69 126 C_ADDTO( *Fout , scratch1 );
Chris@69 127 C_SUB( scratch1 , Fout[1] , Fout[3] );
Chris@69 128
Chris@69 129 Fout[1].r = ADD32_ovflw(scratch0.r, scratch1.i);
Chris@69 130 Fout[1].i = SUB32_ovflw(scratch0.i, scratch1.r);
Chris@69 131 Fout[3].r = SUB32_ovflw(scratch0.r, scratch1.i);
Chris@69 132 Fout[3].i = ADD32_ovflw(scratch0.i, scratch1.r);
Chris@69 133 Fout+=4;
Chris@69 134 }
Chris@69 135 } else {
Chris@69 136 int j;
Chris@69 137 kiss_fft_cpx scratch[6];
Chris@69 138 const kiss_twiddle_cpx *tw1,*tw2,*tw3;
Chris@69 139 const int m2=2*m;
Chris@69 140 const int m3=3*m;
Chris@69 141 kiss_fft_cpx * Fout_beg = Fout;
Chris@69 142 for (i=0;i<N;i++)
Chris@69 143 {
Chris@69 144 Fout = Fout_beg + i*mm;
Chris@69 145 tw3 = tw2 = tw1 = st->twiddles;
Chris@69 146 /* m is guaranteed to be a multiple of 4. */
Chris@69 147 for (j=0;j<m;j++)
Chris@69 148 {
Chris@69 149 C_MUL(scratch[0],Fout[m] , *tw1 );
Chris@69 150 C_MUL(scratch[1],Fout[m2] , *tw2 );
Chris@69 151 C_MUL(scratch[2],Fout[m3] , *tw3 );
Chris@69 152
Chris@69 153 C_SUB( scratch[5] , *Fout, scratch[1] );
Chris@69 154 C_ADDTO(*Fout, scratch[1]);
Chris@69 155 C_ADD( scratch[3] , scratch[0] , scratch[2] );
Chris@69 156 C_SUB( scratch[4] , scratch[0] , scratch[2] );
Chris@69 157 C_SUB( Fout[m2], *Fout, scratch[3] );
Chris@69 158 tw1 += fstride;
Chris@69 159 tw2 += fstride*2;
Chris@69 160 tw3 += fstride*3;
Chris@69 161 C_ADDTO( *Fout , scratch[3] );
Chris@69 162
Chris@69 163 Fout[m].r = ADD32_ovflw(scratch[5].r, scratch[4].i);
Chris@69 164 Fout[m].i = SUB32_ovflw(scratch[5].i, scratch[4].r);
Chris@69 165 Fout[m3].r = SUB32_ovflw(scratch[5].r, scratch[4].i);
Chris@69 166 Fout[m3].i = ADD32_ovflw(scratch[5].i, scratch[4].r);
Chris@69 167 ++Fout;
Chris@69 168 }
Chris@69 169 }
Chris@69 170 }
Chris@69 171 }
Chris@69 172
Chris@69 173
Chris@69 174 #ifndef RADIX_TWO_ONLY
Chris@69 175
Chris@69 176 static void kf_bfly3(
Chris@69 177 kiss_fft_cpx * Fout,
Chris@69 178 const size_t fstride,
Chris@69 179 const kiss_fft_state *st,
Chris@69 180 int m,
Chris@69 181 int N,
Chris@69 182 int mm
Chris@69 183 )
Chris@69 184 {
Chris@69 185 int i;
Chris@69 186 size_t k;
Chris@69 187 const size_t m2 = 2*m;
Chris@69 188 const kiss_twiddle_cpx *tw1,*tw2;
Chris@69 189 kiss_fft_cpx scratch[5];
Chris@69 190 kiss_twiddle_cpx epi3;
Chris@69 191
Chris@69 192 kiss_fft_cpx * Fout_beg = Fout;
Chris@69 193 #ifdef FIXED_POINT
Chris@69 194 /*epi3.r = -16384;*/ /* Unused */
Chris@69 195 epi3.i = -28378;
Chris@69 196 #else
Chris@69 197 epi3 = st->twiddles[fstride*m];
Chris@69 198 #endif
Chris@69 199 for (i=0;i<N;i++)
Chris@69 200 {
Chris@69 201 Fout = Fout_beg + i*mm;
Chris@69 202 tw1=tw2=st->twiddles;
Chris@69 203 /* For non-custom modes, m is guaranteed to be a multiple of 4. */
Chris@69 204 k=m;
Chris@69 205 do {
Chris@69 206
Chris@69 207 C_MUL(scratch[1],Fout[m] , *tw1);
Chris@69 208 C_MUL(scratch[2],Fout[m2] , *tw2);
Chris@69 209
Chris@69 210 C_ADD(scratch[3],scratch[1],scratch[2]);
Chris@69 211 C_SUB(scratch[0],scratch[1],scratch[2]);
Chris@69 212 tw1 += fstride;
Chris@69 213 tw2 += fstride*2;
Chris@69 214
Chris@69 215 Fout[m].r = SUB32_ovflw(Fout->r, HALF_OF(scratch[3].r));
Chris@69 216 Fout[m].i = SUB32_ovflw(Fout->i, HALF_OF(scratch[3].i));
Chris@69 217
Chris@69 218 C_MULBYSCALAR( scratch[0] , epi3.i );
Chris@69 219
Chris@69 220 C_ADDTO(*Fout,scratch[3]);
Chris@69 221
Chris@69 222 Fout[m2].r = ADD32_ovflw(Fout[m].r, scratch[0].i);
Chris@69 223 Fout[m2].i = SUB32_ovflw(Fout[m].i, scratch[0].r);
Chris@69 224
Chris@69 225 Fout[m].r = SUB32_ovflw(Fout[m].r, scratch[0].i);
Chris@69 226 Fout[m].i = ADD32_ovflw(Fout[m].i, scratch[0].r);
Chris@69 227
Chris@69 228 ++Fout;
Chris@69 229 } while(--k);
Chris@69 230 }
Chris@69 231 }
Chris@69 232
Chris@69 233
Chris@69 234 #ifndef OVERRIDE_kf_bfly5
Chris@69 235 static void kf_bfly5(
Chris@69 236 kiss_fft_cpx * Fout,
Chris@69 237 const size_t fstride,
Chris@69 238 const kiss_fft_state *st,
Chris@69 239 int m,
Chris@69 240 int N,
Chris@69 241 int mm
Chris@69 242 )
Chris@69 243 {
Chris@69 244 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
Chris@69 245 int i, u;
Chris@69 246 kiss_fft_cpx scratch[13];
Chris@69 247 const kiss_twiddle_cpx *tw;
Chris@69 248 kiss_twiddle_cpx ya,yb;
Chris@69 249 kiss_fft_cpx * Fout_beg = Fout;
Chris@69 250
Chris@69 251 #ifdef FIXED_POINT
Chris@69 252 ya.r = 10126;
Chris@69 253 ya.i = -31164;
Chris@69 254 yb.r = -26510;
Chris@69 255 yb.i = -19261;
Chris@69 256 #else
Chris@69 257 ya = st->twiddles[fstride*m];
Chris@69 258 yb = st->twiddles[fstride*2*m];
Chris@69 259 #endif
Chris@69 260 tw=st->twiddles;
Chris@69 261
Chris@69 262 for (i=0;i<N;i++)
Chris@69 263 {
Chris@69 264 Fout = Fout_beg + i*mm;
Chris@69 265 Fout0=Fout;
Chris@69 266 Fout1=Fout0+m;
Chris@69 267 Fout2=Fout0+2*m;
Chris@69 268 Fout3=Fout0+3*m;
Chris@69 269 Fout4=Fout0+4*m;
Chris@69 270
Chris@69 271 /* For non-custom modes, m is guaranteed to be a multiple of 4. */
Chris@69 272 for ( u=0; u<m; ++u ) {
Chris@69 273 scratch[0] = *Fout0;
Chris@69 274
Chris@69 275 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
Chris@69 276 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
Chris@69 277 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
Chris@69 278 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
Chris@69 279
Chris@69 280 C_ADD( scratch[7],scratch[1],scratch[4]);
Chris@69 281 C_SUB( scratch[10],scratch[1],scratch[4]);
Chris@69 282 C_ADD( scratch[8],scratch[2],scratch[3]);
Chris@69 283 C_SUB( scratch[9],scratch[2],scratch[3]);
Chris@69 284
Chris@69 285 Fout0->r = ADD32_ovflw(Fout0->r, ADD32_ovflw(scratch[7].r, scratch[8].r));
Chris@69 286 Fout0->i = ADD32_ovflw(Fout0->i, ADD32_ovflw(scratch[7].i, scratch[8].i));
Chris@69 287
Chris@69 288 scratch[5].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,ya.r), S_MUL(scratch[8].r,yb.r)));
Chris@69 289 scratch[5].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,ya.r), S_MUL(scratch[8].i,yb.r)));
Chris@69 290
Chris@69 291 scratch[6].r = ADD32_ovflw(S_MUL(scratch[10].i,ya.i), S_MUL(scratch[9].i,yb.i));
Chris@69 292 scratch[6].i = NEG32_ovflw(ADD32_ovflw(S_MUL(scratch[10].r,ya.i), S_MUL(scratch[9].r,yb.i)));
Chris@69 293
Chris@69 294 C_SUB(*Fout1,scratch[5],scratch[6]);
Chris@69 295 C_ADD(*Fout4,scratch[5],scratch[6]);
Chris@69 296
Chris@69 297 scratch[11].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,yb.r), S_MUL(scratch[8].r,ya.r)));
Chris@69 298 scratch[11].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,yb.r), S_MUL(scratch[8].i,ya.r)));
Chris@69 299 scratch[12].r = SUB32_ovflw(S_MUL(scratch[9].i,ya.i), S_MUL(scratch[10].i,yb.i));
Chris@69 300 scratch[12].i = SUB32_ovflw(S_MUL(scratch[10].r,yb.i), S_MUL(scratch[9].r,ya.i));
Chris@69 301
Chris@69 302 C_ADD(*Fout2,scratch[11],scratch[12]);
Chris@69 303 C_SUB(*Fout3,scratch[11],scratch[12]);
Chris@69 304
Chris@69 305 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
Chris@69 306 }
Chris@69 307 }
Chris@69 308 }
Chris@69 309 #endif /* OVERRIDE_kf_bfly5 */
Chris@69 310
Chris@69 311
Chris@69 312 #endif
Chris@69 313
Chris@69 314
Chris@69 315 #ifdef CUSTOM_MODES
Chris@69 316
Chris@69 317 static
Chris@69 318 void compute_bitrev_table(
Chris@69 319 int Fout,
Chris@69 320 opus_int16 *f,
Chris@69 321 const size_t fstride,
Chris@69 322 int in_stride,
Chris@69 323 opus_int16 * factors,
Chris@69 324 const kiss_fft_state *st
Chris@69 325 )
Chris@69 326 {
Chris@69 327 const int p=*factors++; /* the radix */
Chris@69 328 const int m=*factors++; /* stage's fft length/p */
Chris@69 329
Chris@69 330 /*printf ("fft %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N);*/
Chris@69 331 if (m==1)
Chris@69 332 {
Chris@69 333 int j;
Chris@69 334 for (j=0;j<p;j++)
Chris@69 335 {
Chris@69 336 *f = Fout+j;
Chris@69 337 f += fstride*in_stride;
Chris@69 338 }
Chris@69 339 } else {
Chris@69 340 int j;
Chris@69 341 for (j=0;j<p;j++)
Chris@69 342 {
Chris@69 343 compute_bitrev_table( Fout , f, fstride*p, in_stride, factors,st);
Chris@69 344 f += fstride*in_stride;
Chris@69 345 Fout += m;
Chris@69 346 }
Chris@69 347 }
Chris@69 348 }
Chris@69 349
Chris@69 350 /* facbuf is populated by p1,m1,p2,m2, ...
Chris@69 351 where
Chris@69 352 p[i] * m[i] = m[i-1]
Chris@69 353 m0 = n */
Chris@69 354 static
Chris@69 355 int kf_factor(int n,opus_int16 * facbuf)
Chris@69 356 {
Chris@69 357 int p=4;
Chris@69 358 int i;
Chris@69 359 int stages=0;
Chris@69 360 int nbak = n;
Chris@69 361
Chris@69 362 /*factor out powers of 4, powers of 2, then any remaining primes */
Chris@69 363 do {
Chris@69 364 while (n % p) {
Chris@69 365 switch (p) {
Chris@69 366 case 4: p = 2; break;
Chris@69 367 case 2: p = 3; break;
Chris@69 368 default: p += 2; break;
Chris@69 369 }
Chris@69 370 if (p>32000 || (opus_int32)p*(opus_int32)p > n)
Chris@69 371 p = n; /* no more factors, skip to end */
Chris@69 372 }
Chris@69 373 n /= p;
Chris@69 374 #ifdef RADIX_TWO_ONLY
Chris@69 375 if (p!=2 && p != 4)
Chris@69 376 #else
Chris@69 377 if (p>5)
Chris@69 378 #endif
Chris@69 379 {
Chris@69 380 return 0;
Chris@69 381 }
Chris@69 382 facbuf[2*stages] = p;
Chris@69 383 if (p==2 && stages > 1)
Chris@69 384 {
Chris@69 385 facbuf[2*stages] = 4;
Chris@69 386 facbuf[2] = 2;
Chris@69 387 }
Chris@69 388 stages++;
Chris@69 389 } while (n > 1);
Chris@69 390 n = nbak;
Chris@69 391 /* Reverse the order to get the radix 4 at the end, so we can use the
Chris@69 392 fast degenerate case. It turns out that reversing the order also
Chris@69 393 improves the noise behaviour. */
Chris@69 394 for (i=0;i<stages/2;i++)
Chris@69 395 {
Chris@69 396 int tmp;
Chris@69 397 tmp = facbuf[2*i];
Chris@69 398 facbuf[2*i] = facbuf[2*(stages-i-1)];
Chris@69 399 facbuf[2*(stages-i-1)] = tmp;
Chris@69 400 }
Chris@69 401 for (i=0;i<stages;i++)
Chris@69 402 {
Chris@69 403 n /= facbuf[2*i];
Chris@69 404 facbuf[2*i+1] = n;
Chris@69 405 }
Chris@69 406 return 1;
Chris@69 407 }
Chris@69 408
Chris@69 409 static void compute_twiddles(kiss_twiddle_cpx *twiddles, int nfft)
Chris@69 410 {
Chris@69 411 int i;
Chris@69 412 #ifdef FIXED_POINT
Chris@69 413 for (i=0;i<nfft;++i) {
Chris@69 414 opus_val32 phase = -i;
Chris@69 415 kf_cexp2(twiddles+i, DIV32(SHL32(phase,17),nfft));
Chris@69 416 }
Chris@69 417 #else
Chris@69 418 for (i=0;i<nfft;++i) {
Chris@69 419 const double pi=3.14159265358979323846264338327;
Chris@69 420 double phase = ( -2*pi /nfft ) * i;
Chris@69 421 kf_cexp(twiddles+i, phase );
Chris@69 422 }
Chris@69 423 #endif
Chris@69 424 }
Chris@69 425
Chris@69 426 int opus_fft_alloc_arch_c(kiss_fft_state *st) {
Chris@69 427 (void)st;
Chris@69 428 return 0;
Chris@69 429 }
Chris@69 430
Chris@69 431 /*
Chris@69 432 *
Chris@69 433 * Allocates all necessary storage space for the fft and ifft.
Chris@69 434 * The return value is a contiguous block of memory. As such,
Chris@69 435 * It can be freed with free().
Chris@69 436 * */
Chris@69 437 kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem,
Chris@69 438 const kiss_fft_state *base, int arch)
Chris@69 439 {
Chris@69 440 kiss_fft_state *st=NULL;
Chris@69 441 size_t memneeded = sizeof(struct kiss_fft_state); /* twiddle factors*/
Chris@69 442
Chris@69 443 if ( lenmem==NULL ) {
Chris@69 444 st = ( kiss_fft_state*)KISS_FFT_MALLOC( memneeded );
Chris@69 445 }else{
Chris@69 446 if (mem != NULL && *lenmem >= memneeded)
Chris@69 447 st = (kiss_fft_state*)mem;
Chris@69 448 *lenmem = memneeded;
Chris@69 449 }
Chris@69 450 if (st) {
Chris@69 451 opus_int16 *bitrev;
Chris@69 452 kiss_twiddle_cpx *twiddles;
Chris@69 453
Chris@69 454 st->nfft=nfft;
Chris@69 455 #ifdef FIXED_POINT
Chris@69 456 st->scale_shift = celt_ilog2(st->nfft);
Chris@69 457 if (st->nfft == 1<<st->scale_shift)
Chris@69 458 st->scale = Q15ONE;
Chris@69 459 else
Chris@69 460 st->scale = (1073741824+st->nfft/2)/st->nfft>>(15-st->scale_shift);
Chris@69 461 #else
Chris@69 462 st->scale = 1.f/nfft;
Chris@69 463 #endif
Chris@69 464 if (base != NULL)
Chris@69 465 {
Chris@69 466 st->twiddles = base->twiddles;
Chris@69 467 st->shift = 0;
Chris@69 468 while (st->shift < 32 && nfft<<st->shift != base->nfft)
Chris@69 469 st->shift++;
Chris@69 470 if (st->shift>=32)
Chris@69 471 goto fail;
Chris@69 472 } else {
Chris@69 473 st->twiddles = twiddles = (kiss_twiddle_cpx*)KISS_FFT_MALLOC(sizeof(kiss_twiddle_cpx)*nfft);
Chris@69 474 compute_twiddles(twiddles, nfft);
Chris@69 475 st->shift = -1;
Chris@69 476 }
Chris@69 477 if (!kf_factor(nfft,st->factors))
Chris@69 478 {
Chris@69 479 goto fail;
Chris@69 480 }
Chris@69 481
Chris@69 482 /* bitrev */
Chris@69 483 st->bitrev = bitrev = (opus_int16*)KISS_FFT_MALLOC(sizeof(opus_int16)*nfft);
Chris@69 484 if (st->bitrev==NULL)
Chris@69 485 goto fail;
Chris@69 486 compute_bitrev_table(0, bitrev, 1,1, st->factors,st);
Chris@69 487
Chris@69 488 /* Initialize architecture specific fft parameters */
Chris@69 489 if (opus_fft_alloc_arch(st, arch))
Chris@69 490 goto fail;
Chris@69 491 }
Chris@69 492 return st;
Chris@69 493 fail:
Chris@69 494 opus_fft_free(st, arch);
Chris@69 495 return NULL;
Chris@69 496 }
Chris@69 497
Chris@69 498 kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch)
Chris@69 499 {
Chris@69 500 return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL, arch);
Chris@69 501 }
Chris@69 502
Chris@69 503 void opus_fft_free_arch_c(kiss_fft_state *st) {
Chris@69 504 (void)st;
Chris@69 505 }
Chris@69 506
Chris@69 507 void opus_fft_free(const kiss_fft_state *cfg, int arch)
Chris@69 508 {
Chris@69 509 if (cfg)
Chris@69 510 {
Chris@69 511 opus_fft_free_arch((kiss_fft_state *)cfg, arch);
Chris@69 512 opus_free((opus_int16*)cfg->bitrev);
Chris@69 513 if (cfg->shift < 0)
Chris@69 514 opus_free((kiss_twiddle_cpx*)cfg->twiddles);
Chris@69 515 opus_free((kiss_fft_state*)cfg);
Chris@69 516 }
Chris@69 517 }
Chris@69 518
Chris@69 519 #endif /* CUSTOM_MODES */
Chris@69 520
Chris@69 521 void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout)
Chris@69 522 {
Chris@69 523 int m2, m;
Chris@69 524 int p;
Chris@69 525 int L;
Chris@69 526 int fstride[MAXFACTORS];
Chris@69 527 int i;
Chris@69 528 int shift;
Chris@69 529
Chris@69 530 /* st->shift can be -1 */
Chris@69 531 shift = st->shift>0 ? st->shift : 0;
Chris@69 532
Chris@69 533 fstride[0] = 1;
Chris@69 534 L=0;
Chris@69 535 do {
Chris@69 536 p = st->factors[2*L];
Chris@69 537 m = st->factors[2*L+1];
Chris@69 538 fstride[L+1] = fstride[L]*p;
Chris@69 539 L++;
Chris@69 540 } while(m!=1);
Chris@69 541 m = st->factors[2*L-1];
Chris@69 542 for (i=L-1;i>=0;i--)
Chris@69 543 {
Chris@69 544 if (i!=0)
Chris@69 545 m2 = st->factors[2*i-1];
Chris@69 546 else
Chris@69 547 m2 = 1;
Chris@69 548 switch (st->factors[2*i])
Chris@69 549 {
Chris@69 550 case 2:
Chris@69 551 kf_bfly2(fout, m, fstride[i]);
Chris@69 552 break;
Chris@69 553 case 4:
Chris@69 554 kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
Chris@69 555 break;
Chris@69 556 #ifndef RADIX_TWO_ONLY
Chris@69 557 case 3:
Chris@69 558 kf_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2);
Chris@69 559 break;
Chris@69 560 case 5:
Chris@69 561 kf_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2);
Chris@69 562 break;
Chris@69 563 #endif
Chris@69 564 }
Chris@69 565 m = m2;
Chris@69 566 }
Chris@69 567 }
Chris@69 568
Chris@69 569 void opus_fft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
Chris@69 570 {
Chris@69 571 int i;
Chris@69 572 opus_val16 scale;
Chris@69 573 #ifdef FIXED_POINT
Chris@69 574 /* Allows us to scale with MULT16_32_Q16(), which is faster than
Chris@69 575 MULT16_32_Q15() on ARM. */
Chris@69 576 int scale_shift = st->scale_shift-1;
Chris@69 577 #endif
Chris@69 578 scale = st->scale;
Chris@69 579
Chris@69 580 celt_assert2 (fin != fout, "In-place FFT not supported");
Chris@69 581 /* Bit-reverse the input */
Chris@69 582 for (i=0;i<st->nfft;i++)
Chris@69 583 {
Chris@69 584 kiss_fft_cpx x = fin[i];
Chris@69 585 fout[st->bitrev[i]].r = SHR32(MULT16_32_Q16(scale, x.r), scale_shift);
Chris@69 586 fout[st->bitrev[i]].i = SHR32(MULT16_32_Q16(scale, x.i), scale_shift);
Chris@69 587 }
Chris@69 588 opus_fft_impl(st, fout);
Chris@69 589 }
Chris@69 590
Chris@69 591
Chris@69 592 void opus_ifft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
Chris@69 593 {
Chris@69 594 int i;
Chris@69 595 celt_assert2 (fin != fout, "In-place FFT not supported");
Chris@69 596 /* Bit-reverse the input */
Chris@69 597 for (i=0;i<st->nfft;i++)
Chris@69 598 fout[st->bitrev[i]] = fin[i];
Chris@69 599 for (i=0;i<st->nfft;i++)
Chris@69 600 fout[i].i = -fout[i].i;
Chris@69 601 opus_fft_impl(st, fout);
Chris@69 602 for (i=0;i<st->nfft;i++)
Chris@69 603 fout[i].i = -fout[i].i;
Chris@69 604 }