annotate src/opus-1.3/celt/kiss_fft.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 7aeed7906520
children
rev   line source
Chris@69 1 /*Copyright (c) 2003-2004, Mark Borgerding
Chris@69 2 Lots of modifications by Jean-Marc Valin
Chris@69 3 Copyright (c) 2005-2007, Xiph.Org Foundation
Chris@69 4 Copyright (c) 2008, Xiph.Org Foundation, CSIRO
Chris@69 5
Chris@69 6 All rights reserved.
Chris@69 7
Chris@69 8 Redistribution and use in source and binary forms, with or without
Chris@69 9 modification, are permitted provided that the following conditions are met:
Chris@69 10
Chris@69 11 * Redistributions of source code must retain the above copyright notice,
Chris@69 12 this list of conditions and the following disclaimer.
Chris@69 13 * Redistributions in binary form must reproduce the above copyright notice,
Chris@69 14 this list of conditions and the following disclaimer in the
Chris@69 15 documentation and/or other materials provided with the distribution.
Chris@69 16
Chris@69 17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
Chris@69 18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
Chris@69 19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
Chris@69 20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
Chris@69 21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
Chris@69 22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
Chris@69 23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
Chris@69 24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
Chris@69 25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
Chris@69 26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
Chris@69 27 POSSIBILITY OF SUCH DAMAGE.*/
Chris@69 28
Chris@69 29 /* This code is originally from Mark Borgerding's KISS-FFT but has been
Chris@69 30 heavily modified to better suit Opus */
Chris@69 31
Chris@69 32 #ifndef SKIP_CONFIG_H
Chris@69 33 # ifdef HAVE_CONFIG_H
Chris@69 34 # include "config.h"
Chris@69 35 # endif
Chris@69 36 #endif
Chris@69 37
Chris@69 38 #include "_kiss_fft_guts.h"
Chris@69 39 #include "arch.h"
Chris@69 40 #include "os_support.h"
Chris@69 41 #include "mathops.h"
Chris@69 42 #include "stack_alloc.h"
Chris@69 43
Chris@69 44 /* The guts header contains all the multiplication and addition macros that are defined for
Chris@69 45 complex numbers. It also delares the kf_ internal functions.
Chris@69 46 */
Chris@69 47
Chris@69 48 static void kf_bfly2(
Chris@69 49 kiss_fft_cpx * Fout,
Chris@69 50 int m,
Chris@69 51 int N
Chris@69 52 )
Chris@69 53 {
Chris@69 54 kiss_fft_cpx * Fout2;
Chris@69 55 int i;
Chris@69 56 (void)m;
Chris@69 57 #ifdef CUSTOM_MODES
Chris@69 58 if (m==1)
Chris@69 59 {
Chris@69 60 celt_assert(m==1);
Chris@69 61 for (i=0;i<N;i++)
Chris@69 62 {
Chris@69 63 kiss_fft_cpx t;
Chris@69 64 Fout2 = Fout + 1;
Chris@69 65 t = *Fout2;
Chris@69 66 C_SUB( *Fout2 , *Fout , t );
Chris@69 67 C_ADDTO( *Fout , t );
Chris@69 68 Fout += 2;
Chris@69 69 }
Chris@69 70 } else
Chris@69 71 #endif
Chris@69 72 {
Chris@69 73 opus_val16 tw;
Chris@69 74 tw = QCONST16(0.7071067812f, 15);
Chris@69 75 /* We know that m==4 here because the radix-2 is just after a radix-4 */
Chris@69 76 celt_assert(m==4);
Chris@69 77 for (i=0;i<N;i++)
Chris@69 78 {
Chris@69 79 kiss_fft_cpx t;
Chris@69 80 Fout2 = Fout + 4;
Chris@69 81 t = Fout2[0];
Chris@69 82 C_SUB( Fout2[0] , Fout[0] , t );
Chris@69 83 C_ADDTO( Fout[0] , t );
Chris@69 84
Chris@69 85 t.r = S_MUL(ADD32_ovflw(Fout2[1].r, Fout2[1].i), tw);
Chris@69 86 t.i = S_MUL(SUB32_ovflw(Fout2[1].i, Fout2[1].r), tw);
Chris@69 87 C_SUB( Fout2[1] , Fout[1] , t );
Chris@69 88 C_ADDTO( Fout[1] , t );
Chris@69 89
Chris@69 90 t.r = Fout2[2].i;
Chris@69 91 t.i = -Fout2[2].r;
Chris@69 92 C_SUB( Fout2[2] , Fout[2] , t );
Chris@69 93 C_ADDTO( Fout[2] , t );
Chris@69 94
Chris@69 95 t.r = S_MUL(SUB32_ovflw(Fout2[3].i, Fout2[3].r), tw);
Chris@69 96 t.i = S_MUL(NEG32_ovflw(ADD32_ovflw(Fout2[3].i, Fout2[3].r)), tw);
Chris@69 97 C_SUB( Fout2[3] , Fout[3] , t );
Chris@69 98 C_ADDTO( Fout[3] , t );
Chris@69 99 Fout += 8;
Chris@69 100 }
Chris@69 101 }
Chris@69 102 }
Chris@69 103
Chris@69 104 static void kf_bfly4(
Chris@69 105 kiss_fft_cpx * Fout,
Chris@69 106 const size_t fstride,
Chris@69 107 const kiss_fft_state *st,
Chris@69 108 int m,
Chris@69 109 int N,
Chris@69 110 int mm
Chris@69 111 )
Chris@69 112 {
Chris@69 113 int i;
Chris@69 114
Chris@69 115 if (m==1)
Chris@69 116 {
Chris@69 117 /* Degenerate case where all the twiddles are 1. */
Chris@69 118 for (i=0;i<N;i++)
Chris@69 119 {
Chris@69 120 kiss_fft_cpx scratch0, scratch1;
Chris@69 121
Chris@69 122 C_SUB( scratch0 , *Fout, Fout[2] );
Chris@69 123 C_ADDTO(*Fout, Fout[2]);
Chris@69 124 C_ADD( scratch1 , Fout[1] , Fout[3] );
Chris@69 125 C_SUB( Fout[2], *Fout, scratch1 );
Chris@69 126 C_ADDTO( *Fout , scratch1 );
Chris@69 127 C_SUB( scratch1 , Fout[1] , Fout[3] );
Chris@69 128
Chris@69 129 Fout[1].r = ADD32_ovflw(scratch0.r, scratch1.i);
Chris@69 130 Fout[1].i = SUB32_ovflw(scratch0.i, scratch1.r);
Chris@69 131 Fout[3].r = SUB32_ovflw(scratch0.r, scratch1.i);
Chris@69 132 Fout[3].i = ADD32_ovflw(scratch0.i, scratch1.r);
Chris@69 133 Fout+=4;
Chris@69 134 }
Chris@69 135 } else {
Chris@69 136 int j;
Chris@69 137 kiss_fft_cpx scratch[6];
Chris@69 138 const kiss_twiddle_cpx *tw1,*tw2,*tw3;
Chris@69 139 const int m2=2*m;
Chris@69 140 const int m3=3*m;
Chris@69 141 kiss_fft_cpx * Fout_beg = Fout;
Chris@69 142 for (i=0;i<N;i++)
Chris@69 143 {
Chris@69 144 Fout = Fout_beg + i*mm;
Chris@69 145 tw3 = tw2 = tw1 = st->twiddles;
Chris@69 146 /* m is guaranteed to be a multiple of 4. */
Chris@69 147 for (j=0;j<m;j++)
Chris@69 148 {
Chris@69 149 C_MUL(scratch[0],Fout[m] , *tw1 );
Chris@69 150 C_MUL(scratch[1],Fout[m2] , *tw2 );
Chris@69 151 C_MUL(scratch[2],Fout[m3] , *tw3 );
Chris@69 152
Chris@69 153 C_SUB( scratch[5] , *Fout, scratch[1] );
Chris@69 154 C_ADDTO(*Fout, scratch[1]);
Chris@69 155 C_ADD( scratch[3] , scratch[0] , scratch[2] );
Chris@69 156 C_SUB( scratch[4] , scratch[0] , scratch[2] );
Chris@69 157 C_SUB( Fout[m2], *Fout, scratch[3] );
Chris@69 158 tw1 += fstride;
Chris@69 159 tw2 += fstride*2;
Chris@69 160 tw3 += fstride*3;
Chris@69 161 C_ADDTO( *Fout , scratch[3] );
Chris@69 162
Chris@69 163 Fout[m].r = ADD32_ovflw(scratch[5].r, scratch[4].i);
Chris@69 164 Fout[m].i = SUB32_ovflw(scratch[5].i, scratch[4].r);
Chris@69 165 Fout[m3].r = SUB32_ovflw(scratch[5].r, scratch[4].i);
Chris@69 166 Fout[m3].i = ADD32_ovflw(scratch[5].i, scratch[4].r);
Chris@69 167 ++Fout;
Chris@69 168 }
Chris@69 169 }
Chris@69 170 }
Chris@69 171 }
Chris@69 172
Chris@69 173
Chris@69 174 #ifndef RADIX_TWO_ONLY
Chris@69 175
Chris@69 176 static void kf_bfly3(
Chris@69 177 kiss_fft_cpx * Fout,
Chris@69 178 const size_t fstride,
Chris@69 179 const kiss_fft_state *st,
Chris@69 180 int m,
Chris@69 181 int N,
Chris@69 182 int mm
Chris@69 183 )
Chris@69 184 {
Chris@69 185 int i;
Chris@69 186 size_t k;
Chris@69 187 const size_t m2 = 2*m;
Chris@69 188 const kiss_twiddle_cpx *tw1,*tw2;
Chris@69 189 kiss_fft_cpx scratch[5];
Chris@69 190 kiss_twiddle_cpx epi3;
Chris@69 191
Chris@69 192 kiss_fft_cpx * Fout_beg = Fout;
Chris@69 193 #ifdef FIXED_POINT
Chris@69 194 /*epi3.r = -16384;*/ /* Unused */
Chris@69 195 epi3.i = -28378;
Chris@69 196 #else
Chris@69 197 epi3 = st->twiddles[fstride*m];
Chris@69 198 #endif
Chris@69 199 for (i=0;i<N;i++)
Chris@69 200 {
Chris@69 201 Fout = Fout_beg + i*mm;
Chris@69 202 tw1=tw2=st->twiddles;
Chris@69 203 /* For non-custom modes, m is guaranteed to be a multiple of 4. */
Chris@69 204 k=m;
Chris@69 205 do {
Chris@69 206
Chris@69 207 C_MUL(scratch[1],Fout[m] , *tw1);
Chris@69 208 C_MUL(scratch[2],Fout[m2] , *tw2);
Chris@69 209
Chris@69 210 C_ADD(scratch[3],scratch[1],scratch[2]);
Chris@69 211 C_SUB(scratch[0],scratch[1],scratch[2]);
Chris@69 212 tw1 += fstride;
Chris@69 213 tw2 += fstride*2;
Chris@69 214
Chris@69 215 Fout[m].r = SUB32_ovflw(Fout->r, HALF_OF(scratch[3].r));
Chris@69 216 Fout[m].i = SUB32_ovflw(Fout->i, HALF_OF(scratch[3].i));
Chris@69 217
Chris@69 218 C_MULBYSCALAR( scratch[0] , epi3.i );
Chris@69 219
Chris@69 220 C_ADDTO(*Fout,scratch[3]);
Chris@69 221
Chris@69 222 Fout[m2].r = ADD32_ovflw(Fout[m].r, scratch[0].i);
Chris@69 223 Fout[m2].i = SUB32_ovflw(Fout[m].i, scratch[0].r);
Chris@69 224
Chris@69 225 Fout[m].r = SUB32_ovflw(Fout[m].r, scratch[0].i);
Chris@69 226 Fout[m].i = ADD32_ovflw(Fout[m].i, scratch[0].r);
Chris@69 227
Chris@69 228 ++Fout;
Chris@69 229 } while(--k);
Chris@69 230 }
Chris@69 231 }
Chris@69 232
Chris@69 233
Chris@69 234 #ifndef OVERRIDE_kf_bfly5
Chris@69 235 static void kf_bfly5(
Chris@69 236 kiss_fft_cpx * Fout,
Chris@69 237 const size_t fstride,
Chris@69 238 const kiss_fft_state *st,
Chris@69 239 int m,
Chris@69 240 int N,
Chris@69 241 int mm
Chris@69 242 )
Chris@69 243 {
Chris@69 244 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
Chris@69 245 int i, u;
Chris@69 246 kiss_fft_cpx scratch[13];
Chris@69 247 const kiss_twiddle_cpx *tw;
Chris@69 248 kiss_twiddle_cpx ya,yb;
Chris@69 249 kiss_fft_cpx * Fout_beg = Fout;
Chris@69 250
Chris@69 251 #ifdef FIXED_POINT
Chris@69 252 ya.r = 10126;
Chris@69 253 ya.i = -31164;
Chris@69 254 yb.r = -26510;
Chris@69 255 yb.i = -19261;
Chris@69 256 #else
Chris@69 257 ya = st->twiddles[fstride*m];
Chris@69 258 yb = st->twiddles[fstride*2*m];
Chris@69 259 #endif
Chris@69 260 tw=st->twiddles;
Chris@69 261
Chris@69 262 for (i=0;i<N;i++)
Chris@69 263 {
Chris@69 264 Fout = Fout_beg + i*mm;
Chris@69 265 Fout0=Fout;
Chris@69 266 Fout1=Fout0+m;
Chris@69 267 Fout2=Fout0+2*m;
Chris@69 268 Fout3=Fout0+3*m;
Chris@69 269 Fout4=Fout0+4*m;
Chris@69 270
Chris@69 271 /* For non-custom modes, m is guaranteed to be a multiple of 4. */
Chris@69 272 for ( u=0; u<m; ++u ) {
Chris@69 273 scratch[0] = *Fout0;
Chris@69 274
Chris@69 275 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
Chris@69 276 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
Chris@69 277 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
Chris@69 278 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
Chris@69 279
Chris@69 280 C_ADD( scratch[7],scratch[1],scratch[4]);
Chris@69 281 C_SUB( scratch[10],scratch[1],scratch[4]);
Chris@69 282 C_ADD( scratch[8],scratch[2],scratch[3]);
Chris@69 283 C_SUB( scratch[9],scratch[2],scratch[3]);
Chris@69 284
Chris@69 285 Fout0->r = ADD32_ovflw(Fout0->r, ADD32_ovflw(scratch[7].r, scratch[8].r));
Chris@69 286 Fout0->i = ADD32_ovflw(Fout0->i, ADD32_ovflw(scratch[7].i, scratch[8].i));
Chris@69 287
Chris@69 288 scratch[5].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,ya.r), S_MUL(scratch[8].r,yb.r)));
Chris@69 289 scratch[5].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,ya.r), S_MUL(scratch[8].i,yb.r)));
Chris@69 290
Chris@69 291 scratch[6].r = ADD32_ovflw(S_MUL(scratch[10].i,ya.i), S_MUL(scratch[9].i,yb.i));
Chris@69 292 scratch[6].i = NEG32_ovflw(ADD32_ovflw(S_MUL(scratch[10].r,ya.i), S_MUL(scratch[9].r,yb.i)));
Chris@69 293
Chris@69 294 C_SUB(*Fout1,scratch[5],scratch[6]);
Chris@69 295 C_ADD(*Fout4,scratch[5],scratch[6]);
Chris@69 296
Chris@69 297 scratch[11].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,yb.r), S_MUL(scratch[8].r,ya.r)));
Chris@69 298 scratch[11].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,yb.r), S_MUL(scratch[8].i,ya.r)));
Chris@69 299 scratch[12].r = SUB32_ovflw(S_MUL(scratch[9].i,ya.i), S_MUL(scratch[10].i,yb.i));
Chris@69 300 scratch[12].i = SUB32_ovflw(S_MUL(scratch[10].r,yb.i), S_MUL(scratch[9].r,ya.i));
Chris@69 301
Chris@69 302 C_ADD(*Fout2,scratch[11],scratch[12]);
Chris@69 303 C_SUB(*Fout3,scratch[11],scratch[12]);
Chris@69 304
Chris@69 305 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
Chris@69 306 }
Chris@69 307 }
Chris@69 308 }
Chris@69 309 #endif /* OVERRIDE_kf_bfly5 */
Chris@69 310
Chris@69 311
Chris@69 312 #endif
Chris@69 313
Chris@69 314
Chris@69 315 #ifdef CUSTOM_MODES
Chris@69 316
Chris@69 317 static
Chris@69 318 void compute_bitrev_table(
Chris@69 319 int Fout,
Chris@69 320 opus_int16 *f,
Chris@69 321 const size_t fstride,
Chris@69 322 int in_stride,
Chris@69 323 opus_int16 * factors,
Chris@69 324 const kiss_fft_state *st
Chris@69 325 )
Chris@69 326 {
Chris@69 327 const int p=*factors++; /* the radix */
Chris@69 328 const int m=*factors++; /* stage's fft length/p */
Chris@69 329
Chris@69 330 /*printf ("fft %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N);*/
Chris@69 331 if (m==1)
Chris@69 332 {
Chris@69 333 int j;
Chris@69 334 for (j=0;j<p;j++)
Chris@69 335 {
Chris@69 336 *f = Fout+j;
Chris@69 337 f += fstride*in_stride;
Chris@69 338 }
Chris@69 339 } else {
Chris@69 340 int j;
Chris@69 341 for (j=0;j<p;j++)
Chris@69 342 {
Chris@69 343 compute_bitrev_table( Fout , f, fstride*p, in_stride, factors,st);
Chris@69 344 f += fstride*in_stride;
Chris@69 345 Fout += m;
Chris@69 346 }
Chris@69 347 }
Chris@69 348 }
Chris@69 349
Chris@69 350 /* facbuf is populated by p1,m1,p2,m2, ...
Chris@69 351 where
Chris@69 352 p[i] * m[i] = m[i-1]
Chris@69 353 m0 = n */
Chris@69 354 static
Chris@69 355 int kf_factor(int n,opus_int16 * facbuf)
Chris@69 356 {
Chris@69 357 int p=4;
Chris@69 358 int i;
Chris@69 359 int stages=0;
Chris@69 360 int nbak = n;
Chris@69 361
Chris@69 362 /*factor out powers of 4, powers of 2, then any remaining primes */
Chris@69 363 do {
Chris@69 364 while (n % p) {
Chris@69 365 switch (p) {
Chris@69 366 case 4: p = 2; break;
Chris@69 367 case 2: p = 3; break;
Chris@69 368 default: p += 2; break;
Chris@69 369 }
Chris@69 370 if (p>32000 || (opus_int32)p*(opus_int32)p > n)
Chris@69 371 p = n; /* no more factors, skip to end */
Chris@69 372 }
Chris@69 373 n /= p;
Chris@69 374 #ifdef RADIX_TWO_ONLY
Chris@69 375 if (p!=2 && p != 4)
Chris@69 376 #else
Chris@69 377 if (p>5)
Chris@69 378 #endif
Chris@69 379 {
Chris@69 380 return 0;
Chris@69 381 }
Chris@69 382 facbuf[2*stages] = p;
Chris@69 383 if (p==2 && stages > 1)
Chris@69 384 {
Chris@69 385 facbuf[2*stages] = 4;
Chris@69 386 facbuf[2] = 2;
Chris@69 387 }
Chris@69 388 stages++;
Chris@69 389 } while (n > 1);
Chris@69 390 n = nbak;
Chris@69 391 /* Reverse the order to get the radix 4 at the end, so we can use the
Chris@69 392 fast degenerate case. It turns out that reversing the order also
Chris@69 393 improves the noise behaviour. */
Chris@69 394 for (i=0;i<stages/2;i++)
Chris@69 395 {
Chris@69 396 int tmp;
Chris@69 397 tmp = facbuf[2*i];
Chris@69 398 facbuf[2*i] = facbuf[2*(stages-i-1)];
Chris@69 399 facbuf[2*(stages-i-1)] = tmp;
Chris@69 400 }
Chris@69 401 for (i=0;i<stages;i++)
Chris@69 402 {
Chris@69 403 n /= facbuf[2*i];
Chris@69 404 facbuf[2*i+1] = n;
Chris@69 405 }
Chris@69 406 return 1;
Chris@69 407 }
Chris@69 408
Chris@69 409 static void compute_twiddles(kiss_twiddle_cpx *twiddles, int nfft)
Chris@69 410 {
Chris@69 411 int i;
Chris@69 412 #ifdef FIXED_POINT
Chris@69 413 for (i=0;i<nfft;++i) {
Chris@69 414 opus_val32 phase = -i;
Chris@69 415 kf_cexp2(twiddles+i, DIV32(SHL32(phase,17),nfft));
Chris@69 416 }
Chris@69 417 #else
Chris@69 418 for (i=0;i<nfft;++i) {
Chris@69 419 const double pi=3.14159265358979323846264338327;
Chris@69 420 double phase = ( -2*pi /nfft ) * i;
Chris@69 421 kf_cexp(twiddles+i, phase );
Chris@69 422 }
Chris@69 423 #endif
Chris@69 424 }
Chris@69 425
Chris@69 426 int opus_fft_alloc_arch_c(kiss_fft_state *st) {
Chris@69 427 (void)st;
Chris@69 428 return 0;
Chris@69 429 }
Chris@69 430
Chris@69 431 /*
Chris@69 432 *
Chris@69 433 * Allocates all necessary storage space for the fft and ifft.
Chris@69 434 * The return value is a contiguous block of memory. As such,
Chris@69 435 * It can be freed with free().
Chris@69 436 * */
Chris@69 437 kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem,
Chris@69 438 const kiss_fft_state *base, int arch)
Chris@69 439 {
Chris@69 440 kiss_fft_state *st=NULL;
Chris@69 441 size_t memneeded = sizeof(struct kiss_fft_state); /* twiddle factors*/
Chris@69 442
Chris@69 443 if ( lenmem==NULL ) {
Chris@69 444 st = ( kiss_fft_state*)KISS_FFT_MALLOC( memneeded );
Chris@69 445 }else{
Chris@69 446 if (mem != NULL && *lenmem >= memneeded)
Chris@69 447 st = (kiss_fft_state*)mem;
Chris@69 448 *lenmem = memneeded;
Chris@69 449 }
Chris@69 450 if (st) {
Chris@69 451 opus_int16 *bitrev;
Chris@69 452 kiss_twiddle_cpx *twiddles;
Chris@69 453
Chris@69 454 st->nfft=nfft;
Chris@69 455 #ifdef FIXED_POINT
Chris@69 456 st->scale_shift = celt_ilog2(st->nfft);
Chris@69 457 if (st->nfft == 1<<st->scale_shift)
Chris@69 458 st->scale = Q15ONE;
Chris@69 459 else
Chris@69 460 st->scale = (1073741824+st->nfft/2)/st->nfft>>(15-st->scale_shift);
Chris@69 461 #else
Chris@69 462 st->scale = 1.f/nfft;
Chris@69 463 #endif
Chris@69 464 if (base != NULL)
Chris@69 465 {
Chris@69 466 st->twiddles = base->twiddles;
Chris@69 467 st->shift = 0;
Chris@69 468 while (st->shift < 32 && nfft<<st->shift != base->nfft)
Chris@69 469 st->shift++;
Chris@69 470 if (st->shift>=32)
Chris@69 471 goto fail;
Chris@69 472 } else {
Chris@69 473 st->twiddles = twiddles = (kiss_twiddle_cpx*)KISS_FFT_MALLOC(sizeof(kiss_twiddle_cpx)*nfft);
Chris@69 474 compute_twiddles(twiddles, nfft);
Chris@69 475 st->shift = -1;
Chris@69 476 }
Chris@69 477 if (!kf_factor(nfft,st->factors))
Chris@69 478 {
Chris@69 479 goto fail;
Chris@69 480 }
Chris@69 481
Chris@69 482 /* bitrev */
Chris@69 483 st->bitrev = bitrev = (opus_int16*)KISS_FFT_MALLOC(sizeof(opus_int16)*nfft);
Chris@69 484 if (st->bitrev==NULL)
Chris@69 485 goto fail;
Chris@69 486 compute_bitrev_table(0, bitrev, 1,1, st->factors,st);
Chris@69 487
Chris@69 488 /* Initialize architecture specific fft parameters */
Chris@69 489 if (opus_fft_alloc_arch(st, arch))
Chris@69 490 goto fail;
Chris@69 491 }
Chris@69 492 return st;
Chris@69 493 fail:
Chris@69 494 opus_fft_free(st, arch);
Chris@69 495 return NULL;
Chris@69 496 }
Chris@69 497
Chris@69 498 kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch)
Chris@69 499 {
Chris@69 500 return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL, arch);
Chris@69 501 }
Chris@69 502
Chris@69 503 void opus_fft_free_arch_c(kiss_fft_state *st) {
Chris@69 504 (void)st;
Chris@69 505 }
Chris@69 506
Chris@69 507 void opus_fft_free(const kiss_fft_state *cfg, int arch)
Chris@69 508 {
Chris@69 509 if (cfg)
Chris@69 510 {
Chris@69 511 opus_fft_free_arch((kiss_fft_state *)cfg, arch);
Chris@69 512 opus_free((opus_int16*)cfg->bitrev);
Chris@69 513 if (cfg->shift < 0)
Chris@69 514 opus_free((kiss_twiddle_cpx*)cfg->twiddles);
Chris@69 515 opus_free((kiss_fft_state*)cfg);
Chris@69 516 }
Chris@69 517 }
Chris@69 518
Chris@69 519 #endif /* CUSTOM_MODES */
Chris@69 520
Chris@69 521 void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout)
Chris@69 522 {
Chris@69 523 int m2, m;
Chris@69 524 int p;
Chris@69 525 int L;
Chris@69 526 int fstride[MAXFACTORS];
Chris@69 527 int i;
Chris@69 528 int shift;
Chris@69 529
Chris@69 530 /* st->shift can be -1 */
Chris@69 531 shift = st->shift>0 ? st->shift : 0;
Chris@69 532
Chris@69 533 fstride[0] = 1;
Chris@69 534 L=0;
Chris@69 535 do {
Chris@69 536 p = st->factors[2*L];
Chris@69 537 m = st->factors[2*L+1];
Chris@69 538 fstride[L+1] = fstride[L]*p;
Chris@69 539 L++;
Chris@69 540 } while(m!=1);
Chris@69 541 m = st->factors[2*L-1];
Chris@69 542 for (i=L-1;i>=0;i--)
Chris@69 543 {
Chris@69 544 if (i!=0)
Chris@69 545 m2 = st->factors[2*i-1];
Chris@69 546 else
Chris@69 547 m2 = 1;
Chris@69 548 switch (st->factors[2*i])
Chris@69 549 {
Chris@69 550 case 2:
Chris@69 551 kf_bfly2(fout, m, fstride[i]);
Chris@69 552 break;
Chris@69 553 case 4:
Chris@69 554 kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
Chris@69 555 break;
Chris@69 556 #ifndef RADIX_TWO_ONLY
Chris@69 557 case 3:
Chris@69 558 kf_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2);
Chris@69 559 break;
Chris@69 560 case 5:
Chris@69 561 kf_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2);
Chris@69 562 break;
Chris@69 563 #endif
Chris@69 564 }
Chris@69 565 m = m2;
Chris@69 566 }
Chris@69 567 }
Chris@69 568
Chris@69 569 void opus_fft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
Chris@69 570 {
Chris@69 571 int i;
Chris@69 572 opus_val16 scale;
Chris@69 573 #ifdef FIXED_POINT
Chris@69 574 /* Allows us to scale with MULT16_32_Q16(), which is faster than
Chris@69 575 MULT16_32_Q15() on ARM. */
Chris@69 576 int scale_shift = st->scale_shift-1;
Chris@69 577 #endif
Chris@69 578 scale = st->scale;
Chris@69 579
Chris@69 580 celt_assert2 (fin != fout, "In-place FFT not supported");
Chris@69 581 /* Bit-reverse the input */
Chris@69 582 for (i=0;i<st->nfft;i++)
Chris@69 583 {
Chris@69 584 kiss_fft_cpx x = fin[i];
Chris@69 585 fout[st->bitrev[i]].r = SHR32(MULT16_32_Q16(scale, x.r), scale_shift);
Chris@69 586 fout[st->bitrev[i]].i = SHR32(MULT16_32_Q16(scale, x.i), scale_shift);
Chris@69 587 }
Chris@69 588 opus_fft_impl(st, fout);
Chris@69 589 }
Chris@69 590
Chris@69 591
Chris@69 592 void opus_ifft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
Chris@69 593 {
Chris@69 594 int i;
Chris@69 595 celt_assert2 (fin != fout, "In-place FFT not supported");
Chris@69 596 /* Bit-reverse the input */
Chris@69 597 for (i=0;i<st->nfft;i++)
Chris@69 598 fout[st->bitrev[i]] = fin[i];
Chris@69 599 for (i=0;i<st->nfft;i++)
Chris@69 600 fout[i].i = -fout[i].i;
Chris@69 601 opus_fft_impl(st, fout);
Chris@69 602 for (i=0;i<st->nfft;i++)
Chris@69 603 fout[i].i = -fout[i].i;
Chris@69 604 }