cannam@154: /*Copyright (c) 2003-2004, Mark Borgerding cannam@154: Lots of modifications by Jean-Marc Valin cannam@154: Copyright (c) 2005-2007, Xiph.Org Foundation cannam@154: Copyright (c) 2008, Xiph.Org Foundation, CSIRO cannam@154: cannam@154: All rights reserved. cannam@154: cannam@154: Redistribution and use in source and binary forms, with or without cannam@154: modification, are permitted provided that the following conditions are met: cannam@154: cannam@154: * Redistributions of source code must retain the above copyright notice, cannam@154: this list of conditions and the following disclaimer. cannam@154: * Redistributions in binary form must reproduce the above copyright notice, cannam@154: this list of conditions and the following disclaimer in the cannam@154: documentation and/or other materials provided with the distribution. cannam@154: cannam@154: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" cannam@154: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE cannam@154: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE cannam@154: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE cannam@154: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR cannam@154: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF cannam@154: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS cannam@154: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN cannam@154: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) cannam@154: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE cannam@154: POSSIBILITY OF SUCH DAMAGE.*/ cannam@154: cannam@154: /* This code is originally from Mark Borgerding's KISS-FFT but has been cannam@154: heavily modified to better suit Opus */ cannam@154: cannam@154: #ifndef SKIP_CONFIG_H cannam@154: # ifdef HAVE_CONFIG_H cannam@154: # include "config.h" cannam@154: # endif cannam@154: #endif cannam@154: cannam@154: #include "_kiss_fft_guts.h" cannam@154: #include "arch.h" cannam@154: #include "os_support.h" cannam@154: #include "mathops.h" cannam@154: #include "stack_alloc.h" cannam@154: cannam@154: /* The guts header contains all the multiplication and addition macros that are defined for cannam@154: complex numbers. It also delares the kf_ internal functions. cannam@154: */ cannam@154: cannam@154: static void kf_bfly2( cannam@154: kiss_fft_cpx * Fout, cannam@154: int m, cannam@154: int N cannam@154: ) cannam@154: { cannam@154: kiss_fft_cpx * Fout2; cannam@154: int i; cannam@154: (void)m; cannam@154: #ifdef CUSTOM_MODES cannam@154: if (m==1) cannam@154: { cannam@154: celt_assert(m==1); cannam@154: for (i=0;itwiddles; cannam@154: /* m is guaranteed to be a multiple of 4. */ cannam@154: for (j=0;jtwiddles[fstride*m]; cannam@154: #endif cannam@154: for (i=0;itwiddles; cannam@154: /* For non-custom modes, m is guaranteed to be a multiple of 4. */ cannam@154: k=m; cannam@154: do { cannam@154: cannam@154: C_MUL(scratch[1],Fout[m] , *tw1); cannam@154: C_MUL(scratch[2],Fout[m2] , *tw2); cannam@154: cannam@154: C_ADD(scratch[3],scratch[1],scratch[2]); cannam@154: C_SUB(scratch[0],scratch[1],scratch[2]); cannam@154: tw1 += fstride; cannam@154: tw2 += fstride*2; cannam@154: cannam@154: Fout[m].r = SUB32_ovflw(Fout->r, HALF_OF(scratch[3].r)); cannam@154: Fout[m].i = SUB32_ovflw(Fout->i, HALF_OF(scratch[3].i)); cannam@154: cannam@154: C_MULBYSCALAR( scratch[0] , epi3.i ); cannam@154: cannam@154: C_ADDTO(*Fout,scratch[3]); cannam@154: cannam@154: Fout[m2].r = ADD32_ovflw(Fout[m].r, scratch[0].i); cannam@154: Fout[m2].i = SUB32_ovflw(Fout[m].i, scratch[0].r); cannam@154: cannam@154: Fout[m].r = SUB32_ovflw(Fout[m].r, scratch[0].i); cannam@154: Fout[m].i = ADD32_ovflw(Fout[m].i, scratch[0].r); cannam@154: cannam@154: ++Fout; cannam@154: } while(--k); cannam@154: } cannam@154: } cannam@154: cannam@154: cannam@154: #ifndef OVERRIDE_kf_bfly5 cannam@154: static void kf_bfly5( cannam@154: kiss_fft_cpx * Fout, cannam@154: const size_t fstride, cannam@154: const kiss_fft_state *st, cannam@154: int m, cannam@154: int N, cannam@154: int mm cannam@154: ) cannam@154: { cannam@154: kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; cannam@154: int i, u; cannam@154: kiss_fft_cpx scratch[13]; cannam@154: const kiss_twiddle_cpx *tw; cannam@154: kiss_twiddle_cpx ya,yb; cannam@154: kiss_fft_cpx * Fout_beg = Fout; cannam@154: cannam@154: #ifdef FIXED_POINT cannam@154: ya.r = 10126; cannam@154: ya.i = -31164; cannam@154: yb.r = -26510; cannam@154: yb.i = -19261; cannam@154: #else cannam@154: ya = st->twiddles[fstride*m]; cannam@154: yb = st->twiddles[fstride*2*m]; cannam@154: #endif cannam@154: tw=st->twiddles; cannam@154: cannam@154: for (i=0;ir = ADD32_ovflw(Fout0->r, ADD32_ovflw(scratch[7].r, scratch[8].r)); cannam@154: Fout0->i = ADD32_ovflw(Fout0->i, ADD32_ovflw(scratch[7].i, scratch[8].i)); cannam@154: cannam@154: scratch[5].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,ya.r), S_MUL(scratch[8].r,yb.r))); cannam@154: scratch[5].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,ya.r), S_MUL(scratch[8].i,yb.r))); cannam@154: cannam@154: scratch[6].r = ADD32_ovflw(S_MUL(scratch[10].i,ya.i), S_MUL(scratch[9].i,yb.i)); cannam@154: scratch[6].i = NEG32_ovflw(ADD32_ovflw(S_MUL(scratch[10].r,ya.i), S_MUL(scratch[9].r,yb.i))); cannam@154: cannam@154: C_SUB(*Fout1,scratch[5],scratch[6]); cannam@154: C_ADD(*Fout4,scratch[5],scratch[6]); cannam@154: cannam@154: scratch[11].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,yb.r), S_MUL(scratch[8].r,ya.r))); cannam@154: scratch[11].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,yb.r), S_MUL(scratch[8].i,ya.r))); cannam@154: scratch[12].r = SUB32_ovflw(S_MUL(scratch[9].i,ya.i), S_MUL(scratch[10].i,yb.i)); cannam@154: scratch[12].i = SUB32_ovflw(S_MUL(scratch[10].r,yb.i), S_MUL(scratch[9].r,ya.i)); cannam@154: cannam@154: C_ADD(*Fout2,scratch[11],scratch[12]); cannam@154: C_SUB(*Fout3,scratch[11],scratch[12]); cannam@154: cannam@154: ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; cannam@154: } cannam@154: } cannam@154: } cannam@154: #endif /* OVERRIDE_kf_bfly5 */ cannam@154: cannam@154: cannam@154: #endif cannam@154: cannam@154: cannam@154: #ifdef CUSTOM_MODES cannam@154: cannam@154: static cannam@154: void compute_bitrev_table( cannam@154: int Fout, cannam@154: opus_int16 *f, cannam@154: const size_t fstride, cannam@154: int in_stride, cannam@154: opus_int16 * factors, cannam@154: const kiss_fft_state *st cannam@154: ) cannam@154: { cannam@154: const int p=*factors++; /* the radix */ cannam@154: const int m=*factors++; /* stage's fft length/p */ cannam@154: cannam@154: /*printf ("fft %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N);*/ cannam@154: if (m==1) cannam@154: { cannam@154: int j; cannam@154: for (j=0;j32000 || (opus_int32)p*(opus_int32)p > n) cannam@154: p = n; /* no more factors, skip to end */ cannam@154: } cannam@154: n /= p; cannam@154: #ifdef RADIX_TWO_ONLY cannam@154: if (p!=2 && p != 4) cannam@154: #else cannam@154: if (p>5) cannam@154: #endif cannam@154: { cannam@154: return 0; cannam@154: } cannam@154: facbuf[2*stages] = p; cannam@154: if (p==2 && stages > 1) cannam@154: { cannam@154: facbuf[2*stages] = 4; cannam@154: facbuf[2] = 2; cannam@154: } cannam@154: stages++; cannam@154: } while (n > 1); cannam@154: n = nbak; cannam@154: /* Reverse the order to get the radix 4 at the end, so we can use the cannam@154: fast degenerate case. It turns out that reversing the order also cannam@154: improves the noise behaviour. */ cannam@154: for (i=0;i= memneeded) cannam@154: st = (kiss_fft_state*)mem; cannam@154: *lenmem = memneeded; cannam@154: } cannam@154: if (st) { cannam@154: opus_int16 *bitrev; cannam@154: kiss_twiddle_cpx *twiddles; cannam@154: cannam@154: st->nfft=nfft; cannam@154: #ifdef FIXED_POINT cannam@154: st->scale_shift = celt_ilog2(st->nfft); cannam@154: if (st->nfft == 1<scale_shift) cannam@154: st->scale = Q15ONE; cannam@154: else cannam@154: st->scale = (1073741824+st->nfft/2)/st->nfft>>(15-st->scale_shift); cannam@154: #else cannam@154: st->scale = 1.f/nfft; cannam@154: #endif cannam@154: if (base != NULL) cannam@154: { cannam@154: st->twiddles = base->twiddles; cannam@154: st->shift = 0; cannam@154: while (st->shift < 32 && nfft<shift != base->nfft) cannam@154: st->shift++; cannam@154: if (st->shift>=32) cannam@154: goto fail; cannam@154: } else { cannam@154: st->twiddles = twiddles = (kiss_twiddle_cpx*)KISS_FFT_MALLOC(sizeof(kiss_twiddle_cpx)*nfft); cannam@154: compute_twiddles(twiddles, nfft); cannam@154: st->shift = -1; cannam@154: } cannam@154: if (!kf_factor(nfft,st->factors)) cannam@154: { cannam@154: goto fail; cannam@154: } cannam@154: cannam@154: /* bitrev */ cannam@154: st->bitrev = bitrev = (opus_int16*)KISS_FFT_MALLOC(sizeof(opus_int16)*nfft); cannam@154: if (st->bitrev==NULL) cannam@154: goto fail; cannam@154: compute_bitrev_table(0, bitrev, 1,1, st->factors,st); cannam@154: cannam@154: /* Initialize architecture specific fft parameters */ cannam@154: if (opus_fft_alloc_arch(st, arch)) cannam@154: goto fail; cannam@154: } cannam@154: return st; cannam@154: fail: cannam@154: opus_fft_free(st, arch); cannam@154: return NULL; cannam@154: } cannam@154: cannam@154: kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch) cannam@154: { cannam@154: return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL, arch); cannam@154: } cannam@154: cannam@154: void opus_fft_free_arch_c(kiss_fft_state *st) { cannam@154: (void)st; cannam@154: } cannam@154: cannam@154: void opus_fft_free(const kiss_fft_state *cfg, int arch) cannam@154: { cannam@154: if (cfg) cannam@154: { cannam@154: opus_fft_free_arch((kiss_fft_state *)cfg, arch); cannam@154: opus_free((opus_int16*)cfg->bitrev); cannam@154: if (cfg->shift < 0) cannam@154: opus_free((kiss_twiddle_cpx*)cfg->twiddles); cannam@154: opus_free((kiss_fft_state*)cfg); cannam@154: } cannam@154: } cannam@154: cannam@154: #endif /* CUSTOM_MODES */ cannam@154: cannam@154: void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout) cannam@154: { cannam@154: int m2, m; cannam@154: int p; cannam@154: int L; cannam@154: int fstride[MAXFACTORS]; cannam@154: int i; cannam@154: int shift; cannam@154: cannam@154: /* st->shift can be -1 */ cannam@154: shift = st->shift>0 ? st->shift : 0; cannam@154: cannam@154: fstride[0] = 1; cannam@154: L=0; cannam@154: do { cannam@154: p = st->factors[2*L]; cannam@154: m = st->factors[2*L+1]; cannam@154: fstride[L+1] = fstride[L]*p; cannam@154: L++; cannam@154: } while(m!=1); cannam@154: m = st->factors[2*L-1]; cannam@154: for (i=L-1;i>=0;i--) cannam@154: { cannam@154: if (i!=0) cannam@154: m2 = st->factors[2*i-1]; cannam@154: else cannam@154: m2 = 1; cannam@154: switch (st->factors[2*i]) cannam@154: { cannam@154: case 2: cannam@154: kf_bfly2(fout, m, fstride[i]); cannam@154: break; cannam@154: case 4: cannam@154: kf_bfly4(fout,fstride[i]<scale_shift-1; cannam@154: #endif cannam@154: scale = st->scale; cannam@154: cannam@154: celt_assert2 (fin != fout, "In-place FFT not supported"); cannam@154: /* Bit-reverse the input */ cannam@154: for (i=0;infft;i++) cannam@154: { cannam@154: kiss_fft_cpx x = fin[i]; cannam@154: fout[st->bitrev[i]].r = SHR32(MULT16_32_Q16(scale, x.r), scale_shift); cannam@154: fout[st->bitrev[i]].i = SHR32(MULT16_32_Q16(scale, x.i), scale_shift); cannam@154: } cannam@154: opus_fft_impl(st, fout); cannam@154: } cannam@154: cannam@154: cannam@154: void opus_ifft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) cannam@154: { cannam@154: int i; cannam@154: celt_assert2 (fin != fout, "In-place FFT not supported"); cannam@154: /* Bit-reverse the input */ cannam@154: for (i=0;infft;i++) cannam@154: fout[st->bitrev[i]] = fin[i]; cannam@154: for (i=0;infft;i++) cannam@154: fout[i].i = -fout[i].i; cannam@154: opus_fft_impl(st, fout); cannam@154: for (i=0;infft;i++) cannam@154: fout[i].i = -fout[i].i; cannam@154: }