cannam@154: /* Copyright (c) 2007-2008 CSIRO cannam@154: Copyright (c) 2007-2008 Xiph.Org Foundation cannam@154: Written by Jean-Marc Valin */ cannam@154: /* cannam@154: Redistribution and use in source and binary forms, with or without cannam@154: modification, are permitted provided that the following conditions cannam@154: are met: cannam@154: cannam@154: - Redistributions of source code must retain the above copyright cannam@154: notice, this list of conditions and the following disclaimer. cannam@154: cannam@154: - Redistributions in binary form must reproduce the above copyright cannam@154: notice, this list of conditions and the following disclaimer in the cannam@154: documentation and/or other materials provided with the distribution. cannam@154: cannam@154: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS cannam@154: ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT cannam@154: LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR cannam@154: A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER cannam@154: OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, cannam@154: EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, cannam@154: PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR cannam@154: PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF cannam@154: LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING cannam@154: NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS cannam@154: SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. cannam@154: */ cannam@154: cannam@154: /* This is a simple MDCT implementation that uses a N/4 complex FFT cannam@154: to do most of the work. It should be relatively straightforward to cannam@154: plug in pretty much and FFT here. cannam@154: cannam@154: This replaces the Vorbis FFT (and uses the exact same API), which cannam@154: was a bit too messy and that was ending up duplicating code cannam@154: (might as well use the same FFT everywhere). cannam@154: cannam@154: The algorithm is similar to (and inspired from) Fabrice Bellard's cannam@154: MDCT implementation in FFMPEG, but has differences in signs, ordering cannam@154: and scaling in many places. cannam@154: */ cannam@154: cannam@154: #ifndef SKIP_CONFIG_H cannam@154: #ifdef HAVE_CONFIG_H cannam@154: #include "config.h" cannam@154: #endif cannam@154: #endif cannam@154: cannam@154: #include "mdct.h" cannam@154: #include "kiss_fft.h" cannam@154: #include "_kiss_fft_guts.h" cannam@154: #include cannam@154: #include "os_support.h" cannam@154: #include "mathops.h" cannam@154: #include "stack_alloc.h" cannam@154: cannam@154: #if defined(MIPSr1_ASM) cannam@154: #include "mips/mdct_mipsr1.h" cannam@154: #endif cannam@154: cannam@154: cannam@154: #ifdef CUSTOM_MODES cannam@154: cannam@154: int clt_mdct_init(mdct_lookup *l,int N, int maxshift, int arch) cannam@154: { cannam@154: int i; cannam@154: kiss_twiddle_scalar *trig; cannam@154: int shift; cannam@154: int N2=N>>1; cannam@154: l->n = N; cannam@154: l->maxshift = maxshift; cannam@154: for (i=0;i<=maxshift;i++) cannam@154: { cannam@154: if (i==0) cannam@154: l->kfft[i] = opus_fft_alloc(N>>2>>i, 0, 0, arch); cannam@154: else cannam@154: l->kfft[i] = opus_fft_alloc_twiddles(N>>2>>i, 0, 0, l->kfft[0], arch); cannam@154: #ifndef ENABLE_TI_DSPLIB55 cannam@154: if (l->kfft[i]==NULL) cannam@154: return 0; cannam@154: #endif cannam@154: } cannam@154: l->trig = trig = (kiss_twiddle_scalar*)opus_alloc((N-(N2>>maxshift))*sizeof(kiss_twiddle_scalar)); cannam@154: if (l->trig==NULL) cannam@154: return 0; cannam@154: for (shift=0;shift<=maxshift;shift++) cannam@154: { cannam@154: /* We have enough points that sine isn't necessary */ cannam@154: #if defined(FIXED_POINT) cannam@154: #if 1 cannam@154: for (i=0;i>= 1; cannam@154: N >>= 1; cannam@154: } cannam@154: return 1; cannam@154: } cannam@154: cannam@154: void clt_mdct_clear(mdct_lookup *l, int arch) cannam@154: { cannam@154: int i; cannam@154: for (i=0;i<=l->maxshift;i++) cannam@154: opus_fft_free(l->kfft[i], arch); cannam@154: opus_free((kiss_twiddle_scalar*)l->trig); cannam@154: } cannam@154: cannam@154: #endif /* CUSTOM_MODES */ cannam@154: cannam@154: /* Forward MDCT trashes the input array */ cannam@154: #ifndef OVERRIDE_clt_mdct_forward cannam@154: void clt_mdct_forward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, cannam@154: const opus_val16 *window, int overlap, int shift, int stride, int arch) cannam@154: { cannam@154: int i; cannam@154: int N, N2, N4; cannam@154: VARDECL(kiss_fft_scalar, f); cannam@154: VARDECL(kiss_fft_cpx, f2); cannam@154: const kiss_fft_state *st = l->kfft[shift]; cannam@154: const kiss_twiddle_scalar *trig; cannam@154: opus_val16 scale; cannam@154: #ifdef FIXED_POINT cannam@154: /* Allows us to scale with MULT16_32_Q16(), which is faster than cannam@154: MULT16_32_Q15() on ARM. */ cannam@154: int scale_shift = st->scale_shift-1; cannam@154: #endif cannam@154: SAVE_STACK; cannam@154: (void)arch; cannam@154: scale = st->scale; cannam@154: cannam@154: N = l->n; cannam@154: trig = l->trig; cannam@154: for (i=0;i>= 1; cannam@154: trig += N; cannam@154: } cannam@154: N2 = N>>1; cannam@154: N4 = N>>2; cannam@154: cannam@154: ALLOC(f, N2, kiss_fft_scalar); cannam@154: ALLOC(f2, N4, kiss_fft_cpx); cannam@154: cannam@154: /* Consider the input to be composed of four blocks: [a, b, c, d] */ cannam@154: /* Window, shuffle, fold */ cannam@154: { cannam@154: /* Temp pointers to make it really clear to the compiler what we're doing */ cannam@154: const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1); cannam@154: const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1); cannam@154: kiss_fft_scalar * OPUS_RESTRICT yp = f; cannam@154: const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1); cannam@154: const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1; cannam@154: for(i=0;i<((overlap+3)>>2);i++) cannam@154: { cannam@154: /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/ cannam@154: *yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2); cannam@154: *yp++ = MULT16_32_Q15(*wp1, *xp1) - MULT16_32_Q15(*wp2, xp2[-N2]); cannam@154: xp1+=2; cannam@154: xp2-=2; cannam@154: wp1+=2; cannam@154: wp2-=2; cannam@154: } cannam@154: wp1 = window; cannam@154: wp2 = window+overlap-1; cannam@154: for(;i>2);i++) cannam@154: { cannam@154: /* Real part arranged as a-bR, Imag part arranged as -c-dR */ cannam@154: *yp++ = *xp2; cannam@154: *yp++ = *xp1; cannam@154: xp1+=2; cannam@154: xp2-=2; cannam@154: } cannam@154: for(;ibitrev[i]] = yc; cannam@154: } cannam@154: } cannam@154: cannam@154: /* N/4 complex FFT, does not downscale anymore */ cannam@154: opus_fft_impl(st, f2); cannam@154: cannam@154: /* Post-rotate */ cannam@154: { cannam@154: /* Temp pointers to make it really clear to the compiler what we're doing */ cannam@154: const kiss_fft_cpx * OPUS_RESTRICT fp = f2; cannam@154: kiss_fft_scalar * OPUS_RESTRICT yp1 = out; cannam@154: kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1); cannam@154: const kiss_twiddle_scalar *t = &trig[0]; cannam@154: /* Temp pointers to make it really clear to the compiler what we're doing */ cannam@154: for(i=0;ii,t[N4+i]) - S_MUL(fp->r,t[i]); cannam@154: yi = S_MUL(fp->r,t[N4+i]) + S_MUL(fp->i,t[i]); cannam@154: *yp1 = yr; cannam@154: *yp2 = yi; cannam@154: fp++; cannam@154: yp1 += 2*stride; cannam@154: yp2 -= 2*stride; cannam@154: } cannam@154: } cannam@154: RESTORE_STACK; cannam@154: } cannam@154: #endif /* OVERRIDE_clt_mdct_forward */ cannam@154: cannam@154: #ifndef OVERRIDE_clt_mdct_backward cannam@154: void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, cannam@154: const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride, int arch) cannam@154: { cannam@154: int i; cannam@154: int N, N2, N4; cannam@154: const kiss_twiddle_scalar *trig; cannam@154: (void) arch; cannam@154: cannam@154: N = l->n; cannam@154: trig = l->trig; cannam@154: for (i=0;i>= 1; cannam@154: trig += N; cannam@154: } cannam@154: N2 = N>>1; cannam@154: N4 = N>>2; cannam@154: cannam@154: /* Pre-rotate */ cannam@154: { cannam@154: /* Temp pointers to make it really clear to the compiler what we're doing */ cannam@154: const kiss_fft_scalar * OPUS_RESTRICT xp1 = in; cannam@154: const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1); cannam@154: kiss_fft_scalar * OPUS_RESTRICT yp = out+(overlap>>1); cannam@154: const kiss_twiddle_scalar * OPUS_RESTRICT t = &trig[0]; cannam@154: const opus_int16 * OPUS_RESTRICT bitrev = l->kfft[shift]->bitrev; cannam@154: for(i=0;ikfft[shift], (kiss_fft_cpx*)(out+(overlap>>1))); cannam@154: cannam@154: /* Post-rotate and de-shuffle from both ends of the buffer at once to make cannam@154: it in-place. */ cannam@154: { cannam@154: kiss_fft_scalar * yp0 = out+(overlap>>1); cannam@154: kiss_fft_scalar * yp1 = out+(overlap>>1)+N2-2; cannam@154: const kiss_twiddle_scalar *t = &trig[0]; cannam@154: /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the cannam@154: middle pair will be computed twice. */ cannam@154: for(i=0;i<(N4+1)>>1;i++) cannam@154: { cannam@154: kiss_fft_scalar re, im, yr, yi; cannam@154: kiss_twiddle_scalar t0, t1; cannam@154: /* We swap real and imag because we're using an FFT instead of an IFFT. */ cannam@154: re = yp0[1]; cannam@154: im = yp0[0]; cannam@154: t0 = t[i]; cannam@154: t1 = t[N4+i]; cannam@154: /* We'd scale up by 2 here, but instead it's done when mixing the windows */ cannam@154: yr = ADD32_ovflw(S_MUL(re,t0), S_MUL(im,t1)); cannam@154: yi = SUB32_ovflw(S_MUL(re,t1), S_MUL(im,t0)); cannam@154: /* We swap real and imag because we're using an FFT instead of an IFFT. */ cannam@154: re = yp1[1]; cannam@154: im = yp1[0]; cannam@154: yp0[0] = yr; cannam@154: yp1[1] = yi; cannam@154: cannam@154: t0 = t[(N4-i-1)]; cannam@154: t1 = t[(N2-i-1)]; cannam@154: /* We'd scale up by 2 here, but instead it's done when mixing the windows */ cannam@154: yr = ADD32_ovflw(S_MUL(re,t0), S_MUL(im,t1)); cannam@154: yi = SUB32_ovflw(S_MUL(re,t1), S_MUL(im,t0)); cannam@154: yp1[0] = yr; cannam@154: yp0[1] = yi; cannam@154: yp0 += 2; cannam@154: yp1 -= 2; cannam@154: } cannam@154: } cannam@154: cannam@154: /* Mirror on both sides for TDAC */ cannam@154: { cannam@154: kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1; cannam@154: kiss_fft_scalar * OPUS_RESTRICT yp1 = out; cannam@154: const opus_val16 * OPUS_RESTRICT wp1 = window; cannam@154: const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1; cannam@154: cannam@154: for(i = 0; i < overlap/2; i++) cannam@154: { cannam@154: kiss_fft_scalar x1, x2; cannam@154: x1 = *xp1; cannam@154: x2 = *yp1; cannam@154: *yp1++ = SUB32_ovflw(MULT16_32_Q15(*wp2, x2), MULT16_32_Q15(*wp1, x1)); cannam@154: *xp1-- = ADD32_ovflw(MULT16_32_Q15(*wp1, x2), MULT16_32_Q15(*wp2, x1)); cannam@154: wp1++; cannam@154: wp2--; cannam@154: } cannam@154: } cannam@154: } cannam@154: #endif /* OVERRIDE_clt_mdct_backward */