annotate src/opus-1.3/silk/x86/main_sse.h @ 81:7029a4916348

Merge build update
author Chris Cannam
date Thu, 31 Oct 2019 13:36:58 +0000
parents 7aeed7906520
children
rev   line source
Chris@69 1 /* Copyright (c) 2014, Cisco Systems, INC
Chris@69 2 Written by XiangMingZhu WeiZhou MinPeng YanWang
Chris@69 3
Chris@69 4 Redistribution and use in source and binary forms, with or without
Chris@69 5 modification, are permitted provided that the following conditions
Chris@69 6 are met:
Chris@69 7
Chris@69 8 - Redistributions of source code must retain the above copyright
Chris@69 9 notice, this list of conditions and the following disclaimer.
Chris@69 10
Chris@69 11 - Redistributions in binary form must reproduce the above copyright
Chris@69 12 notice, this list of conditions and the following disclaimer in the
Chris@69 13 documentation and/or other materials provided with the distribution.
Chris@69 14
Chris@69 15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
Chris@69 16 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
Chris@69 17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
Chris@69 18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
Chris@69 19 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
Chris@69 20 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
Chris@69 21 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
Chris@69 22 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
Chris@69 23 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
Chris@69 24 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
Chris@69 25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Chris@69 26 */
Chris@69 27
Chris@69 28 #ifndef MAIN_SSE_H
Chris@69 29 #define MAIN_SSE_H
Chris@69 30
Chris@69 31 #ifdef HAVE_CONFIG_H
Chris@69 32 #include "config.h"
Chris@69 33 #endif
Chris@69 34
Chris@69 35 # if defined(OPUS_X86_MAY_HAVE_SSE4_1)
Chris@69 36
Chris@69 37 #if 0 /* FIXME: SSE disabled until silk_VQ_WMat_EC_sse4_1() gets updated. */
Chris@69 38 # define OVERRIDE_silk_VQ_WMat_EC
Chris@69 39
Chris@69 40 void silk_VQ_WMat_EC_sse4_1(
Chris@69 41 opus_int8 *ind, /* O index of best codebook vector */
Chris@69 42 opus_int32 *rate_dist_Q14, /* O best weighted quant error + mu * rate */
Chris@69 43 opus_int *gain_Q7, /* O sum of absolute LTP coefficients */
Chris@69 44 const opus_int16 *in_Q14, /* I input vector to be quantized */
Chris@69 45 const opus_int32 *W_Q18, /* I weighting matrix */
Chris@69 46 const opus_int8 *cb_Q7, /* I codebook */
Chris@69 47 const opus_uint8 *cb_gain_Q7, /* I codebook effective gain */
Chris@69 48 const opus_uint8 *cl_Q5, /* I code length for each codebook vector */
Chris@69 49 const opus_int mu_Q9, /* I tradeoff betw. weighted error and rate */
Chris@69 50 const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */
Chris@69 51 opus_int L /* I number of vectors in codebook */
Chris@69 52 );
Chris@69 53
Chris@69 54 #if defined OPUS_X86_PRESUME_SSE4_1
Chris@69 55
Chris@69 56 #define silk_VQ_WMat_EC(ind, rate_dist_Q14, gain_Q7, in_Q14, W_Q18, cb_Q7, cb_gain_Q7, cl_Q5, \
Chris@69 57 mu_Q9, max_gain_Q7, L, arch) \
Chris@69 58 ((void)(arch),silk_VQ_WMat_EC_sse4_1(ind, rate_dist_Q14, gain_Q7, in_Q14, W_Q18, cb_Q7, cb_gain_Q7, cl_Q5, \
Chris@69 59 mu_Q9, max_gain_Q7, L))
Chris@69 60
Chris@69 61 #else
Chris@69 62
Chris@69 63 extern void (*const SILK_VQ_WMAT_EC_IMPL[OPUS_ARCHMASK + 1])(
Chris@69 64 opus_int8 *ind, /* O index of best codebook vector */
Chris@69 65 opus_int32 *rate_dist_Q14, /* O best weighted quant error + mu * rate */
Chris@69 66 opus_int *gain_Q7, /* O sum of absolute LTP coefficients */
Chris@69 67 const opus_int16 *in_Q14, /* I input vector to be quantized */
Chris@69 68 const opus_int32 *W_Q18, /* I weighting matrix */
Chris@69 69 const opus_int8 *cb_Q7, /* I codebook */
Chris@69 70 const opus_uint8 *cb_gain_Q7, /* I codebook effective gain */
Chris@69 71 const opus_uint8 *cl_Q5, /* I code length for each codebook vector */
Chris@69 72 const opus_int mu_Q9, /* I tradeoff betw. weighted error and rate */
Chris@69 73 const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */
Chris@69 74 opus_int L /* I number of vectors in codebook */
Chris@69 75 );
Chris@69 76
Chris@69 77 # define silk_VQ_WMat_EC(ind, rate_dist_Q14, gain_Q7, in_Q14, W_Q18, cb_Q7, cb_gain_Q7, cl_Q5, \
Chris@69 78 mu_Q9, max_gain_Q7, L, arch) \
Chris@69 79 ((*SILK_VQ_WMAT_EC_IMPL[(arch) & OPUS_ARCHMASK])(ind, rate_dist_Q14, gain_Q7, in_Q14, W_Q18, cb_Q7, cb_gain_Q7, cl_Q5, \
Chris@69 80 mu_Q9, max_gain_Q7, L))
Chris@69 81
Chris@69 82 #endif
Chris@69 83 #endif
Chris@69 84
Chris@69 85 #if 0 /* FIXME: SSE disabled until the NSQ code gets updated. */
Chris@69 86 # define OVERRIDE_silk_NSQ
Chris@69 87
Chris@69 88 void silk_NSQ_sse4_1(
Chris@69 89 const silk_encoder_state *psEncC, /* I Encoder State */
Chris@69 90 silk_nsq_state *NSQ, /* I/O NSQ state */
Chris@69 91 SideInfoIndices *psIndices, /* I/O Quantization Indices */
Chris@69 92 const opus_int32 x_Q3[], /* I Prefiltered input signal */
Chris@69 93 opus_int8 pulses[], /* O Quantized pulse signal */
Chris@69 94 const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */
Chris@69 95 const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */
Chris@69 96 const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */
Chris@69 97 const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */
Chris@69 98 const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */
Chris@69 99 const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */
Chris@69 100 const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */
Chris@69 101 const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */
Chris@69 102 const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */
Chris@69 103 const opus_int LTP_scale_Q14 /* I LTP state scaling */
Chris@69 104 );
Chris@69 105
Chris@69 106 #if defined OPUS_X86_PRESUME_SSE4_1
Chris@69 107
Chris@69 108 #define silk_NSQ(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
Chris@69 109 HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \
Chris@69 110 ((void)(arch),silk_NSQ_sse4_1(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
Chris@69 111 HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14))
Chris@69 112
Chris@69 113 #else
Chris@69 114
Chris@69 115 extern void (*const SILK_NSQ_IMPL[OPUS_ARCHMASK + 1])(
Chris@69 116 const silk_encoder_state *psEncC, /* I Encoder State */
Chris@69 117 silk_nsq_state *NSQ, /* I/O NSQ state */
Chris@69 118 SideInfoIndices *psIndices, /* I/O Quantization Indices */
Chris@69 119 const opus_int32 x_Q3[], /* I Prefiltered input signal */
Chris@69 120 opus_int8 pulses[], /* O Quantized pulse signal */
Chris@69 121 const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */
Chris@69 122 const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */
Chris@69 123 const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */
Chris@69 124 const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */
Chris@69 125 const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */
Chris@69 126 const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */
Chris@69 127 const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */
Chris@69 128 const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */
Chris@69 129 const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */
Chris@69 130 const opus_int LTP_scale_Q14 /* I LTP state scaling */
Chris@69 131 );
Chris@69 132
Chris@69 133 # define silk_NSQ(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
Chris@69 134 HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \
Chris@69 135 ((*SILK_NSQ_IMPL[(arch) & OPUS_ARCHMASK])(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
Chris@69 136 HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14))
Chris@69 137
Chris@69 138 #endif
Chris@69 139
Chris@69 140 # define OVERRIDE_silk_NSQ_del_dec
Chris@69 141
Chris@69 142 void silk_NSQ_del_dec_sse4_1(
Chris@69 143 const silk_encoder_state *psEncC, /* I Encoder State */
Chris@69 144 silk_nsq_state *NSQ, /* I/O NSQ state */
Chris@69 145 SideInfoIndices *psIndices, /* I/O Quantization Indices */
Chris@69 146 const opus_int32 x_Q3[], /* I Prefiltered input signal */
Chris@69 147 opus_int8 pulses[], /* O Quantized pulse signal */
Chris@69 148 const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */
Chris@69 149 const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */
Chris@69 150 const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */
Chris@69 151 const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */
Chris@69 152 const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */
Chris@69 153 const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */
Chris@69 154 const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */
Chris@69 155 const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */
Chris@69 156 const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */
Chris@69 157 const opus_int LTP_scale_Q14 /* I LTP state scaling */
Chris@69 158 );
Chris@69 159
Chris@69 160 #if defined OPUS_X86_PRESUME_SSE4_1
Chris@69 161
Chris@69 162 #define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
Chris@69 163 HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \
Chris@69 164 ((void)(arch),silk_NSQ_del_dec_sse4_1(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
Chris@69 165 HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14))
Chris@69 166
Chris@69 167 #else
Chris@69 168
Chris@69 169 extern void (*const SILK_NSQ_DEL_DEC_IMPL[OPUS_ARCHMASK + 1])(
Chris@69 170 const silk_encoder_state *psEncC, /* I Encoder State */
Chris@69 171 silk_nsq_state *NSQ, /* I/O NSQ state */
Chris@69 172 SideInfoIndices *psIndices, /* I/O Quantization Indices */
Chris@69 173 const opus_int32 x_Q3[], /* I Prefiltered input signal */
Chris@69 174 opus_int8 pulses[], /* O Quantized pulse signal */
Chris@69 175 const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */
Chris@69 176 const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */
Chris@69 177 const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */
Chris@69 178 const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */
Chris@69 179 const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */
Chris@69 180 const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */
Chris@69 181 const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */
Chris@69 182 const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */
Chris@69 183 const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */
Chris@69 184 const opus_int LTP_scale_Q14 /* I LTP state scaling */
Chris@69 185 );
Chris@69 186
Chris@69 187 # define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
Chris@69 188 HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \
Chris@69 189 ((*SILK_NSQ_DEL_DEC_IMPL[(arch) & OPUS_ARCHMASK])(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
Chris@69 190 HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14))
Chris@69 191
Chris@69 192 #endif
Chris@69 193 #endif
Chris@69 194
Chris@69 195 void silk_noise_shape_quantizer(
Chris@69 196 silk_nsq_state *NSQ, /* I/O NSQ state */
Chris@69 197 opus_int signalType, /* I Signal type */
Chris@69 198 const opus_int32 x_sc_Q10[], /* I */
Chris@69 199 opus_int8 pulses[], /* O */
Chris@69 200 opus_int16 xq[], /* O */
Chris@69 201 opus_int32 sLTP_Q15[], /* I/O LTP state */
Chris@69 202 const opus_int16 a_Q12[], /* I Short term prediction coefs */
Chris@69 203 const opus_int16 b_Q14[], /* I Long term prediction coefs */
Chris@69 204 const opus_int16 AR_shp_Q13[], /* I Noise shaping AR coefs */
Chris@69 205 opus_int lag, /* I Pitch lag */
Chris@69 206 opus_int32 HarmShapeFIRPacked_Q14, /* I */
Chris@69 207 opus_int Tilt_Q14, /* I Spectral tilt */
Chris@69 208 opus_int32 LF_shp_Q14, /* I */
Chris@69 209 opus_int32 Gain_Q16, /* I */
Chris@69 210 opus_int Lambda_Q10, /* I */
Chris@69 211 opus_int offset_Q10, /* I */
Chris@69 212 opus_int length, /* I Input length */
Chris@69 213 opus_int shapingLPCOrder, /* I Noise shaping AR filter order */
Chris@69 214 opus_int predictLPCOrder, /* I Prediction filter order */
Chris@69 215 int arch /* I Architecture */
Chris@69 216 );
Chris@69 217
Chris@69 218 /**************************/
Chris@69 219 /* Noise level estimation */
Chris@69 220 /**************************/
Chris@69 221 void silk_VAD_GetNoiseLevels(
Chris@69 222 const opus_int32 pX[ VAD_N_BANDS ], /* I subband energies */
Chris@69 223 silk_VAD_state *psSilk_VAD /* I/O Pointer to Silk VAD state */
Chris@69 224 );
Chris@69 225
Chris@69 226 # define OVERRIDE_silk_VAD_GetSA_Q8
Chris@69 227
Chris@69 228 opus_int silk_VAD_GetSA_Q8_sse4_1(
Chris@69 229 silk_encoder_state *psEnC,
Chris@69 230 const opus_int16 pIn[]
Chris@69 231 );
Chris@69 232
Chris@69 233 #if defined(OPUS_X86_PRESUME_SSE4_1)
Chris@69 234 #define silk_VAD_GetSA_Q8(psEnC, pIn, arch) ((void)(arch),silk_VAD_GetSA_Q8_sse4_1(psEnC, pIn))
Chris@69 235
Chris@69 236 #else
Chris@69 237
Chris@69 238 # define silk_VAD_GetSA_Q8(psEnC, pIn, arch) \
Chris@69 239 ((*SILK_VAD_GETSA_Q8_IMPL[(arch) & OPUS_ARCHMASK])(psEnC, pIn))
Chris@69 240
Chris@69 241 extern opus_int (*const SILK_VAD_GETSA_Q8_IMPL[OPUS_ARCHMASK + 1])(
Chris@69 242 silk_encoder_state *psEnC,
Chris@69 243 const opus_int16 pIn[]);
Chris@69 244
Chris@69 245 #endif
Chris@69 246
Chris@69 247 # endif
Chris@69 248 #endif