annotate src/opus-1.3/celt/mips/mdct_mipsr1.h @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 7aeed7906520
children
rev   line source
Chris@69 1 /* Copyright (c) 2007-2008 CSIRO
Chris@69 2 Copyright (c) 2007-2008 Xiph.Org Foundation
Chris@69 3 Written by Jean-Marc Valin */
Chris@69 4 /*
Chris@69 5 Redistribution and use in source and binary forms, with or without
Chris@69 6 modification, are permitted provided that the following conditions
Chris@69 7 are met:
Chris@69 8
Chris@69 9 - Redistributions of source code must retain the above copyright
Chris@69 10 notice, this list of conditions and the following disclaimer.
Chris@69 11
Chris@69 12 - Redistributions in binary form must reproduce the above copyright
Chris@69 13 notice, this list of conditions and the following disclaimer in the
Chris@69 14 documentation and/or other materials provided with the distribution.
Chris@69 15
Chris@69 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
Chris@69 17 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
Chris@69 18 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
Chris@69 19 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
Chris@69 20 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
Chris@69 21 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
Chris@69 22 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
Chris@69 23 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
Chris@69 24 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
Chris@69 25 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
Chris@69 26 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Chris@69 27 */
Chris@69 28
Chris@69 29 /* This is a simple MDCT implementation that uses a N/4 complex FFT
Chris@69 30 to do most of the work. It should be relatively straightforward to
Chris@69 31 plug in pretty much and FFT here.
Chris@69 32
Chris@69 33 This replaces the Vorbis FFT (and uses the exact same API), which
Chris@69 34 was a bit too messy and that was ending up duplicating code
Chris@69 35 (might as well use the same FFT everywhere).
Chris@69 36
Chris@69 37 The algorithm is similar to (and inspired from) Fabrice Bellard's
Chris@69 38 MDCT implementation in FFMPEG, but has differences in signs, ordering
Chris@69 39 and scaling in many places.
Chris@69 40 */
Chris@69 41 #ifndef __MDCT_MIPSR1_H__
Chris@69 42 #define __MDCT_MIPSR1_H__
Chris@69 43
Chris@69 44 #ifndef SKIP_CONFIG_H
Chris@69 45 #ifdef HAVE_CONFIG_H
Chris@69 46 #include "config.h"
Chris@69 47 #endif
Chris@69 48 #endif
Chris@69 49
Chris@69 50 #include "mdct.h"
Chris@69 51 #include "kiss_fft.h"
Chris@69 52 #include "_kiss_fft_guts.h"
Chris@69 53 #include <math.h>
Chris@69 54 #include "os_support.h"
Chris@69 55 #include "mathops.h"
Chris@69 56 #include "stack_alloc.h"
Chris@69 57
Chris@69 58 /* Forward MDCT trashes the input array */
Chris@69 59 #define OVERRIDE_clt_mdct_forward
Chris@69 60 void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
Chris@69 61 const opus_val16 *window, int overlap, int shift, int stride, int arch)
Chris@69 62 {
Chris@69 63 int i;
Chris@69 64 int N, N2, N4;
Chris@69 65 VARDECL(kiss_fft_scalar, f);
Chris@69 66 VARDECL(kiss_fft_cpx, f2);
Chris@69 67 const kiss_fft_state *st = l->kfft[shift];
Chris@69 68 const kiss_twiddle_scalar *trig;
Chris@69 69 opus_val16 scale;
Chris@69 70 #ifdef FIXED_POINT
Chris@69 71 /* Allows us to scale with MULT16_32_Q16(), which is faster than
Chris@69 72 MULT16_32_Q15() on ARM. */
Chris@69 73 int scale_shift = st->scale_shift-1;
Chris@69 74 #endif
Chris@69 75
Chris@69 76 (void)arch;
Chris@69 77
Chris@69 78 SAVE_STACK;
Chris@69 79 scale = st->scale;
Chris@69 80
Chris@69 81 N = l->n;
Chris@69 82 trig = l->trig;
Chris@69 83 for (i=0;i<shift;i++)
Chris@69 84 {
Chris@69 85 N >>= 1;
Chris@69 86 trig += N;
Chris@69 87 }
Chris@69 88 N2 = N>>1;
Chris@69 89 N4 = N>>2;
Chris@69 90
Chris@69 91 ALLOC(f, N2, kiss_fft_scalar);
Chris@69 92 ALLOC(f2, N4, kiss_fft_cpx);
Chris@69 93
Chris@69 94 /* Consider the input to be composed of four blocks: [a, b, c, d] */
Chris@69 95 /* Window, shuffle, fold */
Chris@69 96 {
Chris@69 97 /* Temp pointers to make it really clear to the compiler what we're doing */
Chris@69 98 const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1);
Chris@69 99 const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1);
Chris@69 100 kiss_fft_scalar * OPUS_RESTRICT yp = f;
Chris@69 101 const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1);
Chris@69 102 const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1;
Chris@69 103 for(i=0;i<((overlap+3)>>2);i++)
Chris@69 104 {
Chris@69 105 /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
Chris@69 106 *yp++ = S_MUL_ADD(*wp2, xp1[N2],*wp1,*xp2);
Chris@69 107 *yp++ = S_MUL_SUB(*wp1, *xp1,*wp2, xp2[-N2]);
Chris@69 108 xp1+=2;
Chris@69 109 xp2-=2;
Chris@69 110 wp1+=2;
Chris@69 111 wp2-=2;
Chris@69 112 }
Chris@69 113 wp1 = window;
Chris@69 114 wp2 = window+overlap-1;
Chris@69 115 for(;i<N4-((overlap+3)>>2);i++)
Chris@69 116 {
Chris@69 117 /* Real part arranged as a-bR, Imag part arranged as -c-dR */
Chris@69 118 *yp++ = *xp2;
Chris@69 119 *yp++ = *xp1;
Chris@69 120 xp1+=2;
Chris@69 121 xp2-=2;
Chris@69 122 }
Chris@69 123 for(;i<N4;i++)
Chris@69 124 {
Chris@69 125 /* Real part arranged as a-bR, Imag part arranged as -c-dR */
Chris@69 126 *yp++ = S_MUL_SUB(*wp2, *xp2, *wp1, xp1[-N2]);
Chris@69 127 *yp++ = S_MUL_ADD(*wp2, *xp1, *wp1, xp2[N2]);
Chris@69 128 xp1+=2;
Chris@69 129 xp2-=2;
Chris@69 130 wp1+=2;
Chris@69 131 wp2-=2;
Chris@69 132 }
Chris@69 133 }
Chris@69 134 /* Pre-rotation */
Chris@69 135 {
Chris@69 136 kiss_fft_scalar * OPUS_RESTRICT yp = f;
Chris@69 137 const kiss_twiddle_scalar *t = &trig[0];
Chris@69 138 for(i=0;i<N4;i++)
Chris@69 139 {
Chris@69 140 kiss_fft_cpx yc;
Chris@69 141 kiss_twiddle_scalar t0, t1;
Chris@69 142 kiss_fft_scalar re, im, yr, yi;
Chris@69 143 t0 = t[i];
Chris@69 144 t1 = t[N4+i];
Chris@69 145 re = *yp++;
Chris@69 146 im = *yp++;
Chris@69 147
Chris@69 148 yr = S_MUL_SUB(re,t0,im,t1);
Chris@69 149 yi = S_MUL_ADD(im,t0,re,t1);
Chris@69 150
Chris@69 151 yc.r = yr;
Chris@69 152 yc.i = yi;
Chris@69 153 yc.r = PSHR32(MULT16_32_Q16(scale, yc.r), scale_shift);
Chris@69 154 yc.i = PSHR32(MULT16_32_Q16(scale, yc.i), scale_shift);
Chris@69 155 f2[st->bitrev[i]] = yc;
Chris@69 156 }
Chris@69 157 }
Chris@69 158
Chris@69 159 /* N/4 complex FFT, does not downscale anymore */
Chris@69 160 opus_fft_impl(st, f2);
Chris@69 161
Chris@69 162 /* Post-rotate */
Chris@69 163 {
Chris@69 164 /* Temp pointers to make it really clear to the compiler what we're doing */
Chris@69 165 const kiss_fft_cpx * OPUS_RESTRICT fp = f2;
Chris@69 166 kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
Chris@69 167 kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1);
Chris@69 168 const kiss_twiddle_scalar *t = &trig[0];
Chris@69 169 /* Temp pointers to make it really clear to the compiler what we're doing */
Chris@69 170 for(i=0;i<N4;i++)
Chris@69 171 {
Chris@69 172 kiss_fft_scalar yr, yi;
Chris@69 173 yr = S_MUL_SUB(fp->i,t[N4+i] , fp->r,t[i]);
Chris@69 174 yi = S_MUL_ADD(fp->r,t[N4+i] ,fp->i,t[i]);
Chris@69 175 *yp1 = yr;
Chris@69 176 *yp2 = yi;
Chris@69 177 fp++;
Chris@69 178 yp1 += 2*stride;
Chris@69 179 yp2 -= 2*stride;
Chris@69 180 }
Chris@69 181 }
Chris@69 182 RESTORE_STACK;
Chris@69 183 }
Chris@69 184
Chris@69 185 #define OVERRIDE_clt_mdct_backward
Chris@69 186 void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
Chris@69 187 const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride, int arch)
Chris@69 188 {
Chris@69 189 int i;
Chris@69 190 int N, N2, N4;
Chris@69 191 const kiss_twiddle_scalar *trig;
Chris@69 192
Chris@69 193 (void)arch;
Chris@69 194
Chris@69 195 N = l->n;
Chris@69 196 trig = l->trig;
Chris@69 197 for (i=0;i<shift;i++)
Chris@69 198 {
Chris@69 199 N >>= 1;
Chris@69 200 trig += N;
Chris@69 201 }
Chris@69 202 N2 = N>>1;
Chris@69 203 N4 = N>>2;
Chris@69 204
Chris@69 205 /* Pre-rotate */
Chris@69 206 {
Chris@69 207 /* Temp pointers to make it really clear to the compiler what we're doing */
Chris@69 208 const kiss_fft_scalar * OPUS_RESTRICT xp1 = in;
Chris@69 209 const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1);
Chris@69 210 kiss_fft_scalar * OPUS_RESTRICT yp = out+(overlap>>1);
Chris@69 211 const kiss_twiddle_scalar * OPUS_RESTRICT t = &trig[0];
Chris@69 212 const opus_int16 * OPUS_RESTRICT bitrev = l->kfft[shift]->bitrev;
Chris@69 213 for(i=0;i<N4;i++)
Chris@69 214 {
Chris@69 215 int rev;
Chris@69 216 kiss_fft_scalar yr, yi;
Chris@69 217 rev = *bitrev++;
Chris@69 218 yr = S_MUL_ADD(*xp2, t[i] , *xp1, t[N4+i]);
Chris@69 219 yi = S_MUL_SUB(*xp1, t[i] , *xp2, t[N4+i]);
Chris@69 220 /* We swap real and imag because we use an FFT instead of an IFFT. */
Chris@69 221 yp[2*rev+1] = yr;
Chris@69 222 yp[2*rev] = yi;
Chris@69 223 /* Storing the pre-rotation directly in the bitrev order. */
Chris@69 224 xp1+=2*stride;
Chris@69 225 xp2-=2*stride;
Chris@69 226 }
Chris@69 227 }
Chris@69 228
Chris@69 229 opus_fft_impl(l->kfft[shift], (kiss_fft_cpx*)(out+(overlap>>1)));
Chris@69 230
Chris@69 231 /* Post-rotate and de-shuffle from both ends of the buffer at once to make
Chris@69 232 it in-place. */
Chris@69 233 {
Chris@69 234 kiss_fft_scalar * OPUS_RESTRICT yp0 = out+(overlap>>1);
Chris@69 235 kiss_fft_scalar * OPUS_RESTRICT yp1 = out+(overlap>>1)+N2-2;
Chris@69 236 const kiss_twiddle_scalar *t = &trig[0];
Chris@69 237 /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the
Chris@69 238 middle pair will be computed twice. */
Chris@69 239 for(i=0;i<(N4+1)>>1;i++)
Chris@69 240 {
Chris@69 241 kiss_fft_scalar re, im, yr, yi;
Chris@69 242 kiss_twiddle_scalar t0, t1;
Chris@69 243 /* We swap real and imag because we're using an FFT instead of an IFFT. */
Chris@69 244 re = yp0[1];
Chris@69 245 im = yp0[0];
Chris@69 246 t0 = t[i];
Chris@69 247 t1 = t[N4+i];
Chris@69 248 /* We'd scale up by 2 here, but instead it's done when mixing the windows */
Chris@69 249 yr = S_MUL_ADD(re,t0 , im,t1);
Chris@69 250 yi = S_MUL_SUB(re,t1 , im,t0);
Chris@69 251 /* We swap real and imag because we're using an FFT instead of an IFFT. */
Chris@69 252 re = yp1[1];
Chris@69 253 im = yp1[0];
Chris@69 254 yp0[0] = yr;
Chris@69 255 yp1[1] = yi;
Chris@69 256
Chris@69 257 t0 = t[(N4-i-1)];
Chris@69 258 t1 = t[(N2-i-1)];
Chris@69 259 /* We'd scale up by 2 here, but instead it's done when mixing the windows */
Chris@69 260 yr = S_MUL_ADD(re,t0,im,t1);
Chris@69 261 yi = S_MUL_SUB(re,t1,im,t0);
Chris@69 262 yp1[0] = yr;
Chris@69 263 yp0[1] = yi;
Chris@69 264 yp0 += 2;
Chris@69 265 yp1 -= 2;
Chris@69 266 }
Chris@69 267 }
Chris@69 268
Chris@69 269 /* Mirror on both sides for TDAC */
Chris@69 270 {
Chris@69 271 kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1;
Chris@69 272 kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
Chris@69 273 const opus_val16 * OPUS_RESTRICT wp1 = window;
Chris@69 274 const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1;
Chris@69 275
Chris@69 276 for(i = 0; i < overlap/2; i++)
Chris@69 277 {
Chris@69 278 kiss_fft_scalar x1, x2;
Chris@69 279 x1 = *xp1;
Chris@69 280 x2 = *yp1;
Chris@69 281 *yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1);
Chris@69 282 *xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1);
Chris@69 283 wp1++;
Chris@69 284 wp2--;
Chris@69 285 }
Chris@69 286 }
Chris@69 287 }
Chris@69 288 #endif /* __MDCT_MIPSR1_H__ */