Chris@69: /*Copyright (c) 2003-2004, Mark Borgerding Chris@69: Lots of modifications by Jean-Marc Valin Chris@69: Copyright (c) 2005-2007, Xiph.Org Foundation Chris@69: Copyright (c) 2008, Xiph.Org Foundation, CSIRO Chris@69: Chris@69: All rights reserved. Chris@69: Chris@69: Redistribution and use in source and binary forms, with or without Chris@69: modification, are permitted provided that the following conditions are met: Chris@69: Chris@69: * Redistributions of source code must retain the above copyright notice, Chris@69: this list of conditions and the following disclaimer. Chris@69: * Redistributions in binary form must reproduce the above copyright notice, Chris@69: this list of conditions and the following disclaimer in the Chris@69: documentation and/or other materials provided with the distribution. Chris@69: Chris@69: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" Chris@69: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE Chris@69: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE Chris@69: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE Chris@69: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR Chris@69: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF Chris@69: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS Chris@69: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN Chris@69: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) Chris@69: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE Chris@69: POSSIBILITY OF SUCH DAMAGE.*/ Chris@69: Chris@69: /* This code is originally from Mark Borgerding's KISS-FFT but has been Chris@69: heavily modified to better suit Opus */ Chris@69: Chris@69: #ifndef SKIP_CONFIG_H Chris@69: # ifdef HAVE_CONFIG_H Chris@69: # include "config.h" Chris@69: # endif Chris@69: #endif Chris@69: Chris@69: #include "_kiss_fft_guts.h" Chris@69: #include "arch.h" Chris@69: #include "os_support.h" Chris@69: #include "mathops.h" Chris@69: #include "stack_alloc.h" Chris@69: Chris@69: /* The guts header contains all the multiplication and addition macros that are defined for Chris@69: complex numbers. It also delares the kf_ internal functions. Chris@69: */ Chris@69: Chris@69: static void kf_bfly2( Chris@69: kiss_fft_cpx * Fout, Chris@69: int m, Chris@69: int N Chris@69: ) Chris@69: { Chris@69: kiss_fft_cpx * Fout2; Chris@69: int i; Chris@69: (void)m; Chris@69: #ifdef CUSTOM_MODES Chris@69: if (m==1) Chris@69: { Chris@69: celt_assert(m==1); Chris@69: for (i=0;itwiddles; Chris@69: /* m is guaranteed to be a multiple of 4. */ Chris@69: for (j=0;jtwiddles[fstride*m]; Chris@69: #endif Chris@69: for (i=0;itwiddles; Chris@69: /* For non-custom modes, m is guaranteed to be a multiple of 4. */ Chris@69: k=m; Chris@69: do { Chris@69: Chris@69: C_MUL(scratch[1],Fout[m] , *tw1); Chris@69: C_MUL(scratch[2],Fout[m2] , *tw2); Chris@69: Chris@69: C_ADD(scratch[3],scratch[1],scratch[2]); Chris@69: C_SUB(scratch[0],scratch[1],scratch[2]); Chris@69: tw1 += fstride; Chris@69: tw2 += fstride*2; Chris@69: Chris@69: Fout[m].r = SUB32_ovflw(Fout->r, HALF_OF(scratch[3].r)); Chris@69: Fout[m].i = SUB32_ovflw(Fout->i, HALF_OF(scratch[3].i)); Chris@69: Chris@69: C_MULBYSCALAR( scratch[0] , epi3.i ); Chris@69: Chris@69: C_ADDTO(*Fout,scratch[3]); Chris@69: Chris@69: Fout[m2].r = ADD32_ovflw(Fout[m].r, scratch[0].i); Chris@69: Fout[m2].i = SUB32_ovflw(Fout[m].i, scratch[0].r); Chris@69: Chris@69: Fout[m].r = SUB32_ovflw(Fout[m].r, scratch[0].i); Chris@69: Fout[m].i = ADD32_ovflw(Fout[m].i, scratch[0].r); Chris@69: Chris@69: ++Fout; Chris@69: } while(--k); Chris@69: } Chris@69: } Chris@69: Chris@69: Chris@69: #ifndef OVERRIDE_kf_bfly5 Chris@69: static void kf_bfly5( Chris@69: kiss_fft_cpx * Fout, Chris@69: const size_t fstride, Chris@69: const kiss_fft_state *st, Chris@69: int m, Chris@69: int N, Chris@69: int mm Chris@69: ) Chris@69: { Chris@69: kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; Chris@69: int i, u; Chris@69: kiss_fft_cpx scratch[13]; Chris@69: const kiss_twiddle_cpx *tw; Chris@69: kiss_twiddle_cpx ya,yb; Chris@69: kiss_fft_cpx * Fout_beg = Fout; Chris@69: Chris@69: #ifdef FIXED_POINT Chris@69: ya.r = 10126; Chris@69: ya.i = -31164; Chris@69: yb.r = -26510; Chris@69: yb.i = -19261; Chris@69: #else Chris@69: ya = st->twiddles[fstride*m]; Chris@69: yb = st->twiddles[fstride*2*m]; Chris@69: #endif Chris@69: tw=st->twiddles; Chris@69: Chris@69: for (i=0;ir = ADD32_ovflw(Fout0->r, ADD32_ovflw(scratch[7].r, scratch[8].r)); Chris@69: Fout0->i = ADD32_ovflw(Fout0->i, ADD32_ovflw(scratch[7].i, scratch[8].i)); Chris@69: Chris@69: scratch[5].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,ya.r), S_MUL(scratch[8].r,yb.r))); Chris@69: scratch[5].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,ya.r), S_MUL(scratch[8].i,yb.r))); Chris@69: Chris@69: scratch[6].r = ADD32_ovflw(S_MUL(scratch[10].i,ya.i), S_MUL(scratch[9].i,yb.i)); Chris@69: scratch[6].i = NEG32_ovflw(ADD32_ovflw(S_MUL(scratch[10].r,ya.i), S_MUL(scratch[9].r,yb.i))); Chris@69: Chris@69: C_SUB(*Fout1,scratch[5],scratch[6]); Chris@69: C_ADD(*Fout4,scratch[5],scratch[6]); Chris@69: Chris@69: scratch[11].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,yb.r), S_MUL(scratch[8].r,ya.r))); Chris@69: scratch[11].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,yb.r), S_MUL(scratch[8].i,ya.r))); Chris@69: scratch[12].r = SUB32_ovflw(S_MUL(scratch[9].i,ya.i), S_MUL(scratch[10].i,yb.i)); Chris@69: scratch[12].i = SUB32_ovflw(S_MUL(scratch[10].r,yb.i), S_MUL(scratch[9].r,ya.i)); Chris@69: Chris@69: C_ADD(*Fout2,scratch[11],scratch[12]); Chris@69: C_SUB(*Fout3,scratch[11],scratch[12]); Chris@69: Chris@69: ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; Chris@69: } Chris@69: } Chris@69: } Chris@69: #endif /* OVERRIDE_kf_bfly5 */ Chris@69: Chris@69: Chris@69: #endif Chris@69: Chris@69: Chris@69: #ifdef CUSTOM_MODES Chris@69: Chris@69: static Chris@69: void compute_bitrev_table( Chris@69: int Fout, Chris@69: opus_int16 *f, Chris@69: const size_t fstride, Chris@69: int in_stride, Chris@69: opus_int16 * factors, Chris@69: const kiss_fft_state *st Chris@69: ) Chris@69: { Chris@69: const int p=*factors++; /* the radix */ Chris@69: const int m=*factors++; /* stage's fft length/p */ Chris@69: Chris@69: /*printf ("fft %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N);*/ Chris@69: if (m==1) Chris@69: { Chris@69: int j; Chris@69: for (j=0;j32000 || (opus_int32)p*(opus_int32)p > n) Chris@69: p = n; /* no more factors, skip to end */ Chris@69: } Chris@69: n /= p; Chris@69: #ifdef RADIX_TWO_ONLY Chris@69: if (p!=2 && p != 4) Chris@69: #else Chris@69: if (p>5) Chris@69: #endif Chris@69: { Chris@69: return 0; Chris@69: } Chris@69: facbuf[2*stages] = p; Chris@69: if (p==2 && stages > 1) Chris@69: { Chris@69: facbuf[2*stages] = 4; Chris@69: facbuf[2] = 2; Chris@69: } Chris@69: stages++; Chris@69: } while (n > 1); Chris@69: n = nbak; Chris@69: /* Reverse the order to get the radix 4 at the end, so we can use the Chris@69: fast degenerate case. It turns out that reversing the order also Chris@69: improves the noise behaviour. */ Chris@69: for (i=0;i= memneeded) Chris@69: st = (kiss_fft_state*)mem; Chris@69: *lenmem = memneeded; Chris@69: } Chris@69: if (st) { Chris@69: opus_int16 *bitrev; Chris@69: kiss_twiddle_cpx *twiddles; Chris@69: Chris@69: st->nfft=nfft; Chris@69: #ifdef FIXED_POINT Chris@69: st->scale_shift = celt_ilog2(st->nfft); Chris@69: if (st->nfft == 1<scale_shift) Chris@69: st->scale = Q15ONE; Chris@69: else Chris@69: st->scale = (1073741824+st->nfft/2)/st->nfft>>(15-st->scale_shift); Chris@69: #else Chris@69: st->scale = 1.f/nfft; Chris@69: #endif Chris@69: if (base != NULL) Chris@69: { Chris@69: st->twiddles = base->twiddles; Chris@69: st->shift = 0; Chris@69: while (st->shift < 32 && nfft<shift != base->nfft) Chris@69: st->shift++; Chris@69: if (st->shift>=32) Chris@69: goto fail; Chris@69: } else { Chris@69: st->twiddles = twiddles = (kiss_twiddle_cpx*)KISS_FFT_MALLOC(sizeof(kiss_twiddle_cpx)*nfft); Chris@69: compute_twiddles(twiddles, nfft); Chris@69: st->shift = -1; Chris@69: } Chris@69: if (!kf_factor(nfft,st->factors)) Chris@69: { Chris@69: goto fail; Chris@69: } Chris@69: Chris@69: /* bitrev */ Chris@69: st->bitrev = bitrev = (opus_int16*)KISS_FFT_MALLOC(sizeof(opus_int16)*nfft); Chris@69: if (st->bitrev==NULL) Chris@69: goto fail; Chris@69: compute_bitrev_table(0, bitrev, 1,1, st->factors,st); Chris@69: Chris@69: /* Initialize architecture specific fft parameters */ Chris@69: if (opus_fft_alloc_arch(st, arch)) Chris@69: goto fail; Chris@69: } Chris@69: return st; Chris@69: fail: Chris@69: opus_fft_free(st, arch); Chris@69: return NULL; Chris@69: } Chris@69: Chris@69: kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch) Chris@69: { Chris@69: return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL, arch); Chris@69: } Chris@69: Chris@69: void opus_fft_free_arch_c(kiss_fft_state *st) { Chris@69: (void)st; Chris@69: } Chris@69: Chris@69: void opus_fft_free(const kiss_fft_state *cfg, int arch) Chris@69: { Chris@69: if (cfg) Chris@69: { Chris@69: opus_fft_free_arch((kiss_fft_state *)cfg, arch); Chris@69: opus_free((opus_int16*)cfg->bitrev); Chris@69: if (cfg->shift < 0) Chris@69: opus_free((kiss_twiddle_cpx*)cfg->twiddles); Chris@69: opus_free((kiss_fft_state*)cfg); Chris@69: } Chris@69: } Chris@69: Chris@69: #endif /* CUSTOM_MODES */ Chris@69: Chris@69: void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout) Chris@69: { Chris@69: int m2, m; Chris@69: int p; Chris@69: int L; Chris@69: int fstride[MAXFACTORS]; Chris@69: int i; Chris@69: int shift; Chris@69: Chris@69: /* st->shift can be -1 */ Chris@69: shift = st->shift>0 ? st->shift : 0; Chris@69: Chris@69: fstride[0] = 1; Chris@69: L=0; Chris@69: do { Chris@69: p = st->factors[2*L]; Chris@69: m = st->factors[2*L+1]; Chris@69: fstride[L+1] = fstride[L]*p; Chris@69: L++; Chris@69: } while(m!=1); Chris@69: m = st->factors[2*L-1]; Chris@69: for (i=L-1;i>=0;i--) Chris@69: { Chris@69: if (i!=0) Chris@69: m2 = st->factors[2*i-1]; Chris@69: else Chris@69: m2 = 1; Chris@69: switch (st->factors[2*i]) Chris@69: { Chris@69: case 2: Chris@69: kf_bfly2(fout, m, fstride[i]); Chris@69: break; Chris@69: case 4: Chris@69: kf_bfly4(fout,fstride[i]<scale_shift-1; Chris@69: #endif Chris@69: scale = st->scale; Chris@69: Chris@69: celt_assert2 (fin != fout, "In-place FFT not supported"); Chris@69: /* Bit-reverse the input */ Chris@69: for (i=0;infft;i++) Chris@69: { Chris@69: kiss_fft_cpx x = fin[i]; Chris@69: fout[st->bitrev[i]].r = SHR32(MULT16_32_Q16(scale, x.r), scale_shift); Chris@69: fout[st->bitrev[i]].i = SHR32(MULT16_32_Q16(scale, x.i), scale_shift); Chris@69: } Chris@69: opus_fft_impl(st, fout); Chris@69: } Chris@69: Chris@69: Chris@69: void opus_ifft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) Chris@69: { Chris@69: int i; Chris@69: celt_assert2 (fin != fout, "In-place FFT not supported"); Chris@69: /* Bit-reverse the input */ Chris@69: for (i=0;infft;i++) Chris@69: fout[st->bitrev[i]] = fin[i]; Chris@69: for (i=0;infft;i++) Chris@69: fout[i].i = -fout[i].i; Chris@69: opus_fft_impl(st, fout); Chris@69: for (i=0;infft;i++) Chris@69: fout[i].i = -fout[i].i; Chris@69: }