annotate src/opus-1.3/silk/SigProc_FIX.h @ 72:7b5216b54e42

Update exclusion list
author Chris Cannam
date Fri, 25 Jan 2019 13:49:22 +0000
parents 7aeed7906520
children
rev   line source
Chris@69 1 /***********************************************************************
Chris@69 2 Copyright (c) 2006-2011, Skype Limited. All rights reserved.
Chris@69 3 Redistribution and use in source and binary forms, with or without
Chris@69 4 modification, are permitted provided that the following conditions
Chris@69 5 are met:
Chris@69 6 - Redistributions of source code must retain the above copyright notice,
Chris@69 7 this list of conditions and the following disclaimer.
Chris@69 8 - Redistributions in binary form must reproduce the above copyright
Chris@69 9 notice, this list of conditions and the following disclaimer in the
Chris@69 10 documentation and/or other materials provided with the distribution.
Chris@69 11 - Neither the name of Internet Society, IETF or IETF Trust, nor the
Chris@69 12 names of specific contributors, may be used to endorse or promote
Chris@69 13 products derived from this software without specific prior written
Chris@69 14 permission.
Chris@69 15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
Chris@69 16 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
Chris@69 17 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
Chris@69 18 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
Chris@69 19 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
Chris@69 20 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
Chris@69 21 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
Chris@69 22 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
Chris@69 23 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
Chris@69 24 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
Chris@69 25 POSSIBILITY OF SUCH DAMAGE.
Chris@69 26 ***********************************************************************/
Chris@69 27
Chris@69 28 #ifndef SILK_SIGPROC_FIX_H
Chris@69 29 #define SILK_SIGPROC_FIX_H
Chris@69 30
Chris@69 31 #ifdef __cplusplus
Chris@69 32 extern "C"
Chris@69 33 {
Chris@69 34 #endif
Chris@69 35
Chris@69 36 /*#define silk_MACRO_COUNT */ /* Used to enable WMOPS counting */
Chris@69 37
Chris@69 38 #define SILK_MAX_ORDER_LPC 24 /* max order of the LPC analysis in schur() and k2a() */
Chris@69 39
Chris@69 40 #include <string.h> /* for memset(), memcpy(), memmove() */
Chris@69 41 #include "typedef.h"
Chris@69 42 #include "resampler_structs.h"
Chris@69 43 #include "macros.h"
Chris@69 44 #include "cpu_support.h"
Chris@69 45
Chris@69 46 #if defined(OPUS_X86_MAY_HAVE_SSE4_1)
Chris@69 47 #include "x86/SigProc_FIX_sse.h"
Chris@69 48 #endif
Chris@69 49
Chris@69 50 #if (defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR))
Chris@69 51 #include "arm/biquad_alt_arm.h"
Chris@69 52 #include "arm/LPC_inv_pred_gain_arm.h"
Chris@69 53 #endif
Chris@69 54
Chris@69 55 /********************************************************************/
Chris@69 56 /* SIGNAL PROCESSING FUNCTIONS */
Chris@69 57 /********************************************************************/
Chris@69 58
Chris@69 59 /*!
Chris@69 60 * Initialize/reset the resampler state for a given pair of input/output sampling rates
Chris@69 61 */
Chris@69 62 opus_int silk_resampler_init(
Chris@69 63 silk_resampler_state_struct *S, /* I/O Resampler state */
Chris@69 64 opus_int32 Fs_Hz_in, /* I Input sampling rate (Hz) */
Chris@69 65 opus_int32 Fs_Hz_out, /* I Output sampling rate (Hz) */
Chris@69 66 opus_int forEnc /* I If 1: encoder; if 0: decoder */
Chris@69 67 );
Chris@69 68
Chris@69 69 /*!
Chris@69 70 * Resampler: convert from one sampling rate to another
Chris@69 71 */
Chris@69 72 opus_int silk_resampler(
Chris@69 73 silk_resampler_state_struct *S, /* I/O Resampler state */
Chris@69 74 opus_int16 out[], /* O Output signal */
Chris@69 75 const opus_int16 in[], /* I Input signal */
Chris@69 76 opus_int32 inLen /* I Number of input samples */
Chris@69 77 );
Chris@69 78
Chris@69 79 /*!
Chris@69 80 * Downsample 2x, mediocre quality
Chris@69 81 */
Chris@69 82 void silk_resampler_down2(
Chris@69 83 opus_int32 *S, /* I/O State vector [ 2 ] */
Chris@69 84 opus_int16 *out, /* O Output signal [ len ] */
Chris@69 85 const opus_int16 *in, /* I Input signal [ floor(len/2) ] */
Chris@69 86 opus_int32 inLen /* I Number of input samples */
Chris@69 87 );
Chris@69 88
Chris@69 89 /*!
Chris@69 90 * Downsample by a factor 2/3, low quality
Chris@69 91 */
Chris@69 92 void silk_resampler_down2_3(
Chris@69 93 opus_int32 *S, /* I/O State vector [ 6 ] */
Chris@69 94 opus_int16 *out, /* O Output signal [ floor(2*inLen/3) ] */
Chris@69 95 const opus_int16 *in, /* I Input signal [ inLen ] */
Chris@69 96 opus_int32 inLen /* I Number of input samples */
Chris@69 97 );
Chris@69 98
Chris@69 99 /*!
Chris@69 100 * second order ARMA filter;
Chris@69 101 * slower than biquad() but uses more precise coefficients
Chris@69 102 * can handle (slowly) varying coefficients
Chris@69 103 */
Chris@69 104 void silk_biquad_alt_stride1(
Chris@69 105 const opus_int16 *in, /* I input signal */
Chris@69 106 const opus_int32 *B_Q28, /* I MA coefficients [3] */
Chris@69 107 const opus_int32 *A_Q28, /* I AR coefficients [2] */
Chris@69 108 opus_int32 *S, /* I/O State vector [2] */
Chris@69 109 opus_int16 *out, /* O output signal */
Chris@69 110 const opus_int32 len /* I signal length (must be even) */
Chris@69 111 );
Chris@69 112
Chris@69 113 void silk_biquad_alt_stride2_c(
Chris@69 114 const opus_int16 *in, /* I input signal */
Chris@69 115 const opus_int32 *B_Q28, /* I MA coefficients [3] */
Chris@69 116 const opus_int32 *A_Q28, /* I AR coefficients [2] */
Chris@69 117 opus_int32 *S, /* I/O State vector [4] */
Chris@69 118 opus_int16 *out, /* O output signal */
Chris@69 119 const opus_int32 len /* I signal length (must be even) */
Chris@69 120 );
Chris@69 121
Chris@69 122 /* Variable order MA prediction error filter. */
Chris@69 123 void silk_LPC_analysis_filter(
Chris@69 124 opus_int16 *out, /* O Output signal */
Chris@69 125 const opus_int16 *in, /* I Input signal */
Chris@69 126 const opus_int16 *B, /* I MA prediction coefficients, Q12 [order] */
Chris@69 127 const opus_int32 len, /* I Signal length */
Chris@69 128 const opus_int32 d, /* I Filter order */
Chris@69 129 int arch /* I Run-time architecture */
Chris@69 130 );
Chris@69 131
Chris@69 132 /* Chirp (bandwidth expand) LP AR filter */
Chris@69 133 void silk_bwexpander(
Chris@69 134 opus_int16 *ar, /* I/O AR filter to be expanded (without leading 1) */
Chris@69 135 const opus_int d, /* I Length of ar */
Chris@69 136 opus_int32 chirp_Q16 /* I Chirp factor (typically in the range 0 to 1) */
Chris@69 137 );
Chris@69 138
Chris@69 139 /* Chirp (bandwidth expand) LP AR filter */
Chris@69 140 void silk_bwexpander_32(
Chris@69 141 opus_int32 *ar, /* I/O AR filter to be expanded (without leading 1) */
Chris@69 142 const opus_int d, /* I Length of ar */
Chris@69 143 opus_int32 chirp_Q16 /* I Chirp factor in Q16 */
Chris@69 144 );
Chris@69 145
Chris@69 146 /* Compute inverse of LPC prediction gain, and */
Chris@69 147 /* test if LPC coefficients are stable (all poles within unit circle) */
Chris@69 148 opus_int32 silk_LPC_inverse_pred_gain_c( /* O Returns inverse prediction gain in energy domain, Q30 */
Chris@69 149 const opus_int16 *A_Q12, /* I Prediction coefficients, Q12 [order] */
Chris@69 150 const opus_int order /* I Prediction order */
Chris@69 151 );
Chris@69 152
Chris@69 153 /* Split signal in two decimated bands using first-order allpass filters */
Chris@69 154 void silk_ana_filt_bank_1(
Chris@69 155 const opus_int16 *in, /* I Input signal [N] */
Chris@69 156 opus_int32 *S, /* I/O State vector [2] */
Chris@69 157 opus_int16 *outL, /* O Low band [N/2] */
Chris@69 158 opus_int16 *outH, /* O High band [N/2] */
Chris@69 159 const opus_int32 N /* I Number of input samples */
Chris@69 160 );
Chris@69 161
Chris@69 162 #if !defined(OVERRIDE_silk_biquad_alt_stride2)
Chris@69 163 #define silk_biquad_alt_stride2(in, B_Q28, A_Q28, S, out, len, arch) ((void)(arch), silk_biquad_alt_stride2_c(in, B_Q28, A_Q28, S, out, len))
Chris@69 164 #endif
Chris@69 165
Chris@69 166 #if !defined(OVERRIDE_silk_LPC_inverse_pred_gain)
Chris@69 167 #define silk_LPC_inverse_pred_gain(A_Q12, order, arch) ((void)(arch), silk_LPC_inverse_pred_gain_c(A_Q12, order))
Chris@69 168 #endif
Chris@69 169
Chris@69 170 /********************************************************************/
Chris@69 171 /* SCALAR FUNCTIONS */
Chris@69 172 /********************************************************************/
Chris@69 173
Chris@69 174 /* Approximation of 128 * log2() (exact inverse of approx 2^() below) */
Chris@69 175 /* Convert input to a log scale */
Chris@69 176 opus_int32 silk_lin2log(
Chris@69 177 const opus_int32 inLin /* I input in linear scale */
Chris@69 178 );
Chris@69 179
Chris@69 180 /* Approximation of a sigmoid function */
Chris@69 181 opus_int silk_sigm_Q15(
Chris@69 182 opus_int in_Q5 /* I */
Chris@69 183 );
Chris@69 184
Chris@69 185 /* Approximation of 2^() (exact inverse of approx log2() above) */
Chris@69 186 /* Convert input to a linear scale */
Chris@69 187 opus_int32 silk_log2lin(
Chris@69 188 const opus_int32 inLog_Q7 /* I input on log scale */
Chris@69 189 );
Chris@69 190
Chris@69 191 /* Compute number of bits to right shift the sum of squares of a vector */
Chris@69 192 /* of int16s to make it fit in an int32 */
Chris@69 193 void silk_sum_sqr_shift(
Chris@69 194 opus_int32 *energy, /* O Energy of x, after shifting to the right */
Chris@69 195 opus_int *shift, /* O Number of bits right shift applied to energy */
Chris@69 196 const opus_int16 *x, /* I Input vector */
Chris@69 197 opus_int len /* I Length of input vector */
Chris@69 198 );
Chris@69 199
Chris@69 200 /* Calculates the reflection coefficients from the correlation sequence */
Chris@69 201 /* Faster than schur64(), but much less accurate. */
Chris@69 202 /* uses SMLAWB(), requiring armv5E and higher. */
Chris@69 203 opus_int32 silk_schur( /* O Returns residual energy */
Chris@69 204 opus_int16 *rc_Q15, /* O reflection coefficients [order] Q15 */
Chris@69 205 const opus_int32 *c, /* I correlations [order+1] */
Chris@69 206 const opus_int32 order /* I prediction order */
Chris@69 207 );
Chris@69 208
Chris@69 209 /* Calculates the reflection coefficients from the correlation sequence */
Chris@69 210 /* Slower than schur(), but more accurate. */
Chris@69 211 /* Uses SMULL(), available on armv4 */
Chris@69 212 opus_int32 silk_schur64( /* O returns residual energy */
Chris@69 213 opus_int32 rc_Q16[], /* O Reflection coefficients [order] Q16 */
Chris@69 214 const opus_int32 c[], /* I Correlations [order+1] */
Chris@69 215 opus_int32 order /* I Prediction order */
Chris@69 216 );
Chris@69 217
Chris@69 218 /* Step up function, converts reflection coefficients to prediction coefficients */
Chris@69 219 void silk_k2a(
Chris@69 220 opus_int32 *A_Q24, /* O Prediction coefficients [order] Q24 */
Chris@69 221 const opus_int16 *rc_Q15, /* I Reflection coefficients [order] Q15 */
Chris@69 222 const opus_int32 order /* I Prediction order */
Chris@69 223 );
Chris@69 224
Chris@69 225 /* Step up function, converts reflection coefficients to prediction coefficients */
Chris@69 226 void silk_k2a_Q16(
Chris@69 227 opus_int32 *A_Q24, /* O Prediction coefficients [order] Q24 */
Chris@69 228 const opus_int32 *rc_Q16, /* I Reflection coefficients [order] Q16 */
Chris@69 229 const opus_int32 order /* I Prediction order */
Chris@69 230 );
Chris@69 231
Chris@69 232 /* Apply sine window to signal vector. */
Chris@69 233 /* Window types: */
Chris@69 234 /* 1 -> sine window from 0 to pi/2 */
Chris@69 235 /* 2 -> sine window from pi/2 to pi */
Chris@69 236 /* every other sample of window is linearly interpolated, for speed */
Chris@69 237 void silk_apply_sine_window(
Chris@69 238 opus_int16 px_win[], /* O Pointer to windowed signal */
Chris@69 239 const opus_int16 px[], /* I Pointer to input signal */
Chris@69 240 const opus_int win_type, /* I Selects a window type */
Chris@69 241 const opus_int length /* I Window length, multiple of 4 */
Chris@69 242 );
Chris@69 243
Chris@69 244 /* Compute autocorrelation */
Chris@69 245 void silk_autocorr(
Chris@69 246 opus_int32 *results, /* O Result (length correlationCount) */
Chris@69 247 opus_int *scale, /* O Scaling of the correlation vector */
Chris@69 248 const opus_int16 *inputData, /* I Input data to correlate */
Chris@69 249 const opus_int inputDataSize, /* I Length of input */
Chris@69 250 const opus_int correlationCount, /* I Number of correlation taps to compute */
Chris@69 251 int arch /* I Run-time architecture */
Chris@69 252 );
Chris@69 253
Chris@69 254 void silk_decode_pitch(
Chris@69 255 opus_int16 lagIndex, /* I */
Chris@69 256 opus_int8 contourIndex, /* O */
Chris@69 257 opus_int pitch_lags[], /* O 4 pitch values */
Chris@69 258 const opus_int Fs_kHz, /* I sampling frequency (kHz) */
Chris@69 259 const opus_int nb_subfr /* I number of sub frames */
Chris@69 260 );
Chris@69 261
Chris@69 262 opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 voiced, 1 unvoiced */
Chris@69 263 const opus_int16 *frame, /* I Signal of length PE_FRAME_LENGTH_MS*Fs_kHz */
Chris@69 264 opus_int *pitch_out, /* O 4 pitch lag values */
Chris@69 265 opus_int16 *lagIndex, /* O Lag Index */
Chris@69 266 opus_int8 *contourIndex, /* O Pitch contour Index */
Chris@69 267 opus_int *LTPCorr_Q15, /* I/O Normalized correlation; input: value from previous frame */
Chris@69 268 opus_int prevLag, /* I Last lag of previous frame; set to zero is unvoiced */
Chris@69 269 const opus_int32 search_thres1_Q16, /* I First stage threshold for lag candidates 0 - 1 */
Chris@69 270 const opus_int search_thres2_Q13, /* I Final threshold for lag candidates 0 - 1 */
Chris@69 271 const opus_int Fs_kHz, /* I Sample frequency (kHz) */
Chris@69 272 const opus_int complexity, /* I Complexity setting, 0-2, where 2 is highest */
Chris@69 273 const opus_int nb_subfr, /* I number of 5 ms subframes */
Chris@69 274 int arch /* I Run-time architecture */
Chris@69 275 );
Chris@69 276
Chris@69 277 /* Compute Normalized Line Spectral Frequencies (NLSFs) from whitening filter coefficients */
Chris@69 278 /* If not all roots are found, the a_Q16 coefficients are bandwidth expanded until convergence. */
Chris@69 279 void silk_A2NLSF(
Chris@69 280 opus_int16 *NLSF, /* O Normalized Line Spectral Frequencies in Q15 (0..2^15-1) [d] */
Chris@69 281 opus_int32 *a_Q16, /* I/O Monic whitening filter coefficients in Q16 [d] */
Chris@69 282 const opus_int d /* I Filter order (must be even) */
Chris@69 283 );
Chris@69 284
Chris@69 285 /* compute whitening filter coefficients from normalized line spectral frequencies */
Chris@69 286 void silk_NLSF2A(
Chris@69 287 opus_int16 *a_Q12, /* O monic whitening filter coefficients in Q12, [ d ] */
Chris@69 288 const opus_int16 *NLSF, /* I normalized line spectral frequencies in Q15, [ d ] */
Chris@69 289 const opus_int d, /* I filter order (should be even) */
Chris@69 290 int arch /* I Run-time architecture */
Chris@69 291 );
Chris@69 292
Chris@69 293 /* Convert int32 coefficients to int16 coefs and make sure there's no wrap-around */
Chris@69 294 void silk_LPC_fit(
Chris@69 295 opus_int16 *a_QOUT, /* O Output signal */
Chris@69 296 opus_int32 *a_QIN, /* I/O Input signal */
Chris@69 297 const opus_int QOUT, /* I Input Q domain */
Chris@69 298 const opus_int QIN, /* I Input Q domain */
Chris@69 299 const opus_int d /* I Filter order */
Chris@69 300 );
Chris@69 301
Chris@69 302 void silk_insertion_sort_increasing(
Chris@69 303 opus_int32 *a, /* I/O Unsorted / Sorted vector */
Chris@69 304 opus_int *idx, /* O Index vector for the sorted elements */
Chris@69 305 const opus_int L, /* I Vector length */
Chris@69 306 const opus_int K /* I Number of correctly sorted positions */
Chris@69 307 );
Chris@69 308
Chris@69 309 void silk_insertion_sort_decreasing_int16(
Chris@69 310 opus_int16 *a, /* I/O Unsorted / Sorted vector */
Chris@69 311 opus_int *idx, /* O Index vector for the sorted elements */
Chris@69 312 const opus_int L, /* I Vector length */
Chris@69 313 const opus_int K /* I Number of correctly sorted positions */
Chris@69 314 );
Chris@69 315
Chris@69 316 void silk_insertion_sort_increasing_all_values_int16(
Chris@69 317 opus_int16 *a, /* I/O Unsorted / Sorted vector */
Chris@69 318 const opus_int L /* I Vector length */
Chris@69 319 );
Chris@69 320
Chris@69 321 /* NLSF stabilizer, for a single input data vector */
Chris@69 322 void silk_NLSF_stabilize(
Chris@69 323 opus_int16 *NLSF_Q15, /* I/O Unstable/stabilized normalized LSF vector in Q15 [L] */
Chris@69 324 const opus_int16 *NDeltaMin_Q15, /* I Min distance vector, NDeltaMin_Q15[L] must be >= 1 [L+1] */
Chris@69 325 const opus_int L /* I Number of NLSF parameters in the input vector */
Chris@69 326 );
Chris@69 327
Chris@69 328 /* Laroia low complexity NLSF weights */
Chris@69 329 void silk_NLSF_VQ_weights_laroia(
Chris@69 330 opus_int16 *pNLSFW_Q_OUT, /* O Pointer to input vector weights [D] */
Chris@69 331 const opus_int16 *pNLSF_Q15, /* I Pointer to input vector [D] */
Chris@69 332 const opus_int D /* I Input vector dimension (even) */
Chris@69 333 );
Chris@69 334
Chris@69 335 /* Compute reflection coefficients from input signal */
Chris@69 336 void silk_burg_modified_c(
Chris@69 337 opus_int32 *res_nrg, /* O Residual energy */
Chris@69 338 opus_int *res_nrg_Q, /* O Residual energy Q value */
Chris@69 339 opus_int32 A_Q16[], /* O Prediction coefficients (length order) */
Chris@69 340 const opus_int16 x[], /* I Input signal, length: nb_subfr * ( D + subfr_length ) */
Chris@69 341 const opus_int32 minInvGain_Q30, /* I Inverse of max prediction gain */
Chris@69 342 const opus_int subfr_length, /* I Input signal subframe length (incl. D preceding samples) */
Chris@69 343 const opus_int nb_subfr, /* I Number of subframes stacked in x */
Chris@69 344 const opus_int D, /* I Order */
Chris@69 345 int arch /* I Run-time architecture */
Chris@69 346 );
Chris@69 347
Chris@69 348 /* Copy and multiply a vector by a constant */
Chris@69 349 void silk_scale_copy_vector16(
Chris@69 350 opus_int16 *data_out,
Chris@69 351 const opus_int16 *data_in,
Chris@69 352 opus_int32 gain_Q16, /* I Gain in Q16 */
Chris@69 353 const opus_int dataSize /* I Length */
Chris@69 354 );
Chris@69 355
Chris@69 356 /* Some for the LTP related function requires Q26 to work.*/
Chris@69 357 void silk_scale_vector32_Q26_lshift_18(
Chris@69 358 opus_int32 *data1, /* I/O Q0/Q18 */
Chris@69 359 opus_int32 gain_Q26, /* I Q26 */
Chris@69 360 opus_int dataSize /* I length */
Chris@69 361 );
Chris@69 362
Chris@69 363 /********************************************************************/
Chris@69 364 /* INLINE ARM MATH */
Chris@69 365 /********************************************************************/
Chris@69 366
Chris@69 367 /* return sum( inVec1[i] * inVec2[i] ) */
Chris@69 368
Chris@69 369 opus_int32 silk_inner_prod_aligned(
Chris@69 370 const opus_int16 *const inVec1, /* I input vector 1 */
Chris@69 371 const opus_int16 *const inVec2, /* I input vector 2 */
Chris@69 372 const opus_int len, /* I vector lengths */
Chris@69 373 int arch /* I Run-time architecture */
Chris@69 374 );
Chris@69 375
Chris@69 376
Chris@69 377 opus_int32 silk_inner_prod_aligned_scale(
Chris@69 378 const opus_int16 *const inVec1, /* I input vector 1 */
Chris@69 379 const opus_int16 *const inVec2, /* I input vector 2 */
Chris@69 380 const opus_int scale, /* I number of bits to shift */
Chris@69 381 const opus_int len /* I vector lengths */
Chris@69 382 );
Chris@69 383
Chris@69 384 opus_int64 silk_inner_prod16_aligned_64_c(
Chris@69 385 const opus_int16 *inVec1, /* I input vector 1 */
Chris@69 386 const opus_int16 *inVec2, /* I input vector 2 */
Chris@69 387 const opus_int len /* I vector lengths */
Chris@69 388 );
Chris@69 389
Chris@69 390 /********************************************************************/
Chris@69 391 /* MACROS */
Chris@69 392 /********************************************************************/
Chris@69 393
Chris@69 394 /* Rotate a32 right by 'rot' bits. Negative rot values result in rotating
Chris@69 395 left. Output is 32bit int.
Chris@69 396 Note: contemporary compilers recognize the C expression below and
Chris@69 397 compile it into a 'ror' instruction if available. No need for OPUS_INLINE ASM! */
Chris@69 398 static OPUS_INLINE opus_int32 silk_ROR32( opus_int32 a32, opus_int rot )
Chris@69 399 {
Chris@69 400 opus_uint32 x = (opus_uint32) a32;
Chris@69 401 opus_uint32 r = (opus_uint32) rot;
Chris@69 402 opus_uint32 m = (opus_uint32) -rot;
Chris@69 403 if( rot == 0 ) {
Chris@69 404 return a32;
Chris@69 405 } else if( rot < 0 ) {
Chris@69 406 return (opus_int32) ((x << m) | (x >> (32 - m)));
Chris@69 407 } else {
Chris@69 408 return (opus_int32) ((x << (32 - r)) | (x >> r));
Chris@69 409 }
Chris@69 410 }
Chris@69 411
Chris@69 412 /* Allocate opus_int16 aligned to 4-byte memory address */
Chris@69 413 #if EMBEDDED_ARM
Chris@69 414 #define silk_DWORD_ALIGN __attribute__((aligned(4)))
Chris@69 415 #else
Chris@69 416 #define silk_DWORD_ALIGN
Chris@69 417 #endif
Chris@69 418
Chris@69 419 /* Useful Macros that can be adjusted to other platforms */
Chris@69 420 #define silk_memcpy(dest, src, size) memcpy((dest), (src), (size))
Chris@69 421 #define silk_memset(dest, src, size) memset((dest), (src), (size))
Chris@69 422 #define silk_memmove(dest, src, size) memmove((dest), (src), (size))
Chris@69 423
Chris@69 424 /* Fixed point macros */
Chris@69 425
Chris@69 426 /* (a32 * b32) output have to be 32bit int */
Chris@69 427 #define silk_MUL(a32, b32) ((a32) * (b32))
Chris@69 428
Chris@69 429 /* (a32 * b32) output have to be 32bit uint */
Chris@69 430 #define silk_MUL_uint(a32, b32) silk_MUL(a32, b32)
Chris@69 431
Chris@69 432 /* a32 + (b32 * c32) output have to be 32bit int */
Chris@69 433 #define silk_MLA(a32, b32, c32) silk_ADD32((a32),((b32) * (c32)))
Chris@69 434
Chris@69 435 /* a32 + (b32 * c32) output have to be 32bit uint */
Chris@69 436 #define silk_MLA_uint(a32, b32, c32) silk_MLA(a32, b32, c32)
Chris@69 437
Chris@69 438 /* ((a32 >> 16) * (b32 >> 16)) output have to be 32bit int */
Chris@69 439 #define silk_SMULTT(a32, b32) (((a32) >> 16) * ((b32) >> 16))
Chris@69 440
Chris@69 441 /* a32 + ((a32 >> 16) * (b32 >> 16)) output have to be 32bit int */
Chris@69 442 #define silk_SMLATT(a32, b32, c32) silk_ADD32((a32),((b32) >> 16) * ((c32) >> 16))
Chris@69 443
Chris@69 444 #define silk_SMLALBB(a64, b16, c16) silk_ADD64((a64),(opus_int64)((opus_int32)(b16) * (opus_int32)(c16)))
Chris@69 445
Chris@69 446 /* (a32 * b32) */
Chris@69 447 #define silk_SMULL(a32, b32) ((opus_int64)(a32) * /*(opus_int64)*/(b32))
Chris@69 448
Chris@69 449 /* Adds two signed 32-bit values in a way that can overflow, while not relying on undefined behaviour
Chris@69 450 (just standard two's complement implementation-specific behaviour) */
Chris@69 451 #define silk_ADD32_ovflw(a, b) ((opus_int32)((opus_uint32)(a) + (opus_uint32)(b)))
Chris@69 452 /* Subtractss two signed 32-bit values in a way that can overflow, while not relying on undefined behaviour
Chris@69 453 (just standard two's complement implementation-specific behaviour) */
Chris@69 454 #define silk_SUB32_ovflw(a, b) ((opus_int32)((opus_uint32)(a) - (opus_uint32)(b)))
Chris@69 455
Chris@69 456 /* Multiply-accumulate macros that allow overflow in the addition (ie, no asserts in debug mode) */
Chris@69 457 #define silk_MLA_ovflw(a32, b32, c32) silk_ADD32_ovflw((a32), (opus_uint32)(b32) * (opus_uint32)(c32))
Chris@69 458 #define silk_SMLABB_ovflw(a32, b32, c32) (silk_ADD32_ovflw((a32) , ((opus_int32)((opus_int16)(b32))) * (opus_int32)((opus_int16)(c32))))
Chris@69 459
Chris@69 460 #define silk_DIV32_16(a32, b16) ((opus_int32)((a32) / (b16)))
Chris@69 461 #define silk_DIV32(a32, b32) ((opus_int32)((a32) / (b32)))
Chris@69 462
Chris@69 463 /* These macros enables checking for overflow in silk_API_Debug.h*/
Chris@69 464 #define silk_ADD16(a, b) ((a) + (b))
Chris@69 465 #define silk_ADD32(a, b) ((a) + (b))
Chris@69 466 #define silk_ADD64(a, b) ((a) + (b))
Chris@69 467
Chris@69 468 #define silk_SUB16(a, b) ((a) - (b))
Chris@69 469 #define silk_SUB32(a, b) ((a) - (b))
Chris@69 470 #define silk_SUB64(a, b) ((a) - (b))
Chris@69 471
Chris@69 472 #define silk_SAT8(a) ((a) > silk_int8_MAX ? silk_int8_MAX : \
Chris@69 473 ((a) < silk_int8_MIN ? silk_int8_MIN : (a)))
Chris@69 474 #define silk_SAT16(a) ((a) > silk_int16_MAX ? silk_int16_MAX : \
Chris@69 475 ((a) < silk_int16_MIN ? silk_int16_MIN : (a)))
Chris@69 476 #define silk_SAT32(a) ((a) > silk_int32_MAX ? silk_int32_MAX : \
Chris@69 477 ((a) < silk_int32_MIN ? silk_int32_MIN : (a)))
Chris@69 478
Chris@69 479 #define silk_CHECK_FIT8(a) (a)
Chris@69 480 #define silk_CHECK_FIT16(a) (a)
Chris@69 481 #define silk_CHECK_FIT32(a) (a)
Chris@69 482
Chris@69 483 #define silk_ADD_SAT16(a, b) (opus_int16)silk_SAT16( silk_ADD32( (opus_int32)(a), (b) ) )
Chris@69 484 #define silk_ADD_SAT64(a, b) ((((a) + (b)) & 0x8000000000000000LL) == 0 ? \
Chris@69 485 ((((a) & (b)) & 0x8000000000000000LL) != 0 ? silk_int64_MIN : (a)+(b)) : \
Chris@69 486 ((((a) | (b)) & 0x8000000000000000LL) == 0 ? silk_int64_MAX : (a)+(b)) )
Chris@69 487
Chris@69 488 #define silk_SUB_SAT16(a, b) (opus_int16)silk_SAT16( silk_SUB32( (opus_int32)(a), (b) ) )
Chris@69 489 #define silk_SUB_SAT64(a, b) ((((a)-(b)) & 0x8000000000000000LL) == 0 ? \
Chris@69 490 (( (a) & ((b)^0x8000000000000000LL) & 0x8000000000000000LL) ? silk_int64_MIN : (a)-(b)) : \
Chris@69 491 ((((a)^0x8000000000000000LL) & (b) & 0x8000000000000000LL) ? silk_int64_MAX : (a)-(b)) )
Chris@69 492
Chris@69 493 /* Saturation for positive input values */
Chris@69 494 #define silk_POS_SAT32(a) ((a) > silk_int32_MAX ? silk_int32_MAX : (a))
Chris@69 495
Chris@69 496 /* Add with saturation for positive input values */
Chris@69 497 #define silk_ADD_POS_SAT8(a, b) ((((a)+(b)) & 0x80) ? silk_int8_MAX : ((a)+(b)))
Chris@69 498 #define silk_ADD_POS_SAT16(a, b) ((((a)+(b)) & 0x8000) ? silk_int16_MAX : ((a)+(b)))
Chris@69 499 #define silk_ADD_POS_SAT32(a, b) ((((opus_uint32)(a)+(opus_uint32)(b)) & 0x80000000) ? silk_int32_MAX : ((a)+(b)))
Chris@69 500
Chris@69 501 #define silk_LSHIFT8(a, shift) ((opus_int8)((opus_uint8)(a)<<(shift))) /* shift >= 0, shift < 8 */
Chris@69 502 #define silk_LSHIFT16(a, shift) ((opus_int16)((opus_uint16)(a)<<(shift))) /* shift >= 0, shift < 16 */
Chris@69 503 #define silk_LSHIFT32(a, shift) ((opus_int32)((opus_uint32)(a)<<(shift))) /* shift >= 0, shift < 32 */
Chris@69 504 #define silk_LSHIFT64(a, shift) ((opus_int64)((opus_uint64)(a)<<(shift))) /* shift >= 0, shift < 64 */
Chris@69 505 #define silk_LSHIFT(a, shift) silk_LSHIFT32(a, shift) /* shift >= 0, shift < 32 */
Chris@69 506
Chris@69 507 #define silk_RSHIFT8(a, shift) ((a)>>(shift)) /* shift >= 0, shift < 8 */
Chris@69 508 #define silk_RSHIFT16(a, shift) ((a)>>(shift)) /* shift >= 0, shift < 16 */
Chris@69 509 #define silk_RSHIFT32(a, shift) ((a)>>(shift)) /* shift >= 0, shift < 32 */
Chris@69 510 #define silk_RSHIFT64(a, shift) ((a)>>(shift)) /* shift >= 0, shift < 64 */
Chris@69 511 #define silk_RSHIFT(a, shift) silk_RSHIFT32(a, shift) /* shift >= 0, shift < 32 */
Chris@69 512
Chris@69 513 /* saturates before shifting */
Chris@69 514 #define silk_LSHIFT_SAT32(a, shift) (silk_LSHIFT32( silk_LIMIT( (a), silk_RSHIFT32( silk_int32_MIN, (shift) ), \
Chris@69 515 silk_RSHIFT32( silk_int32_MAX, (shift) ) ), (shift) ))
Chris@69 516
Chris@69 517 #define silk_LSHIFT_ovflw(a, shift) ((opus_int32)((opus_uint32)(a) << (shift))) /* shift >= 0, allowed to overflow */
Chris@69 518 #define silk_LSHIFT_uint(a, shift) ((a) << (shift)) /* shift >= 0 */
Chris@69 519 #define silk_RSHIFT_uint(a, shift) ((a) >> (shift)) /* shift >= 0 */
Chris@69 520
Chris@69 521 #define silk_ADD_LSHIFT(a, b, shift) ((a) + silk_LSHIFT((b), (shift))) /* shift >= 0 */
Chris@69 522 #define silk_ADD_LSHIFT32(a, b, shift) silk_ADD32((a), silk_LSHIFT32((b), (shift))) /* shift >= 0 */
Chris@69 523 #define silk_ADD_LSHIFT_uint(a, b, shift) ((a) + silk_LSHIFT_uint((b), (shift))) /* shift >= 0 */
Chris@69 524 #define silk_ADD_RSHIFT(a, b, shift) ((a) + silk_RSHIFT((b), (shift))) /* shift >= 0 */
Chris@69 525 #define silk_ADD_RSHIFT32(a, b, shift) silk_ADD32((a), silk_RSHIFT32((b), (shift))) /* shift >= 0 */
Chris@69 526 #define silk_ADD_RSHIFT_uint(a, b, shift) ((a) + silk_RSHIFT_uint((b), (shift))) /* shift >= 0 */
Chris@69 527 #define silk_SUB_LSHIFT32(a, b, shift) silk_SUB32((a), silk_LSHIFT32((b), (shift))) /* shift >= 0 */
Chris@69 528 #define silk_SUB_RSHIFT32(a, b, shift) silk_SUB32((a), silk_RSHIFT32((b), (shift))) /* shift >= 0 */
Chris@69 529
Chris@69 530 /* Requires that shift > 0 */
Chris@69 531 #define silk_RSHIFT_ROUND(a, shift) ((shift) == 1 ? ((a) >> 1) + ((a) & 1) : (((a) >> ((shift) - 1)) + 1) >> 1)
Chris@69 532 #define silk_RSHIFT_ROUND64(a, shift) ((shift) == 1 ? ((a) >> 1) + ((a) & 1) : (((a) >> ((shift) - 1)) + 1) >> 1)
Chris@69 533
Chris@69 534 /* Number of rightshift required to fit the multiplication */
Chris@69 535 #define silk_NSHIFT_MUL_32_32(a, b) ( -(31- (32-silk_CLZ32(silk_abs(a)) + (32-silk_CLZ32(silk_abs(b))))) )
Chris@69 536 #define silk_NSHIFT_MUL_16_16(a, b) ( -(15- (16-silk_CLZ16(silk_abs(a)) + (16-silk_CLZ16(silk_abs(b))))) )
Chris@69 537
Chris@69 538
Chris@69 539 #define silk_min(a, b) (((a) < (b)) ? (a) : (b))
Chris@69 540 #define silk_max(a, b) (((a) > (b)) ? (a) : (b))
Chris@69 541
Chris@69 542 /* Macro to convert floating-point constants to fixed-point */
Chris@69 543 #define SILK_FIX_CONST( C, Q ) ((opus_int32)((C) * ((opus_int64)1 << (Q)) + 0.5))
Chris@69 544
Chris@69 545 /* silk_min() versions with typecast in the function call */
Chris@69 546 static OPUS_INLINE opus_int silk_min_int(opus_int a, opus_int b)
Chris@69 547 {
Chris@69 548 return (((a) < (b)) ? (a) : (b));
Chris@69 549 }
Chris@69 550 static OPUS_INLINE opus_int16 silk_min_16(opus_int16 a, opus_int16 b)
Chris@69 551 {
Chris@69 552 return (((a) < (b)) ? (a) : (b));
Chris@69 553 }
Chris@69 554 static OPUS_INLINE opus_int32 silk_min_32(opus_int32 a, opus_int32 b)
Chris@69 555 {
Chris@69 556 return (((a) < (b)) ? (a) : (b));
Chris@69 557 }
Chris@69 558 static OPUS_INLINE opus_int64 silk_min_64(opus_int64 a, opus_int64 b)
Chris@69 559 {
Chris@69 560 return (((a) < (b)) ? (a) : (b));
Chris@69 561 }
Chris@69 562
Chris@69 563 /* silk_min() versions with typecast in the function call */
Chris@69 564 static OPUS_INLINE opus_int silk_max_int(opus_int a, opus_int b)
Chris@69 565 {
Chris@69 566 return (((a) > (b)) ? (a) : (b));
Chris@69 567 }
Chris@69 568 static OPUS_INLINE opus_int16 silk_max_16(opus_int16 a, opus_int16 b)
Chris@69 569 {
Chris@69 570 return (((a) > (b)) ? (a) : (b));
Chris@69 571 }
Chris@69 572 static OPUS_INLINE opus_int32 silk_max_32(opus_int32 a, opus_int32 b)
Chris@69 573 {
Chris@69 574 return (((a) > (b)) ? (a) : (b));
Chris@69 575 }
Chris@69 576 static OPUS_INLINE opus_int64 silk_max_64(opus_int64 a, opus_int64 b)
Chris@69 577 {
Chris@69 578 return (((a) > (b)) ? (a) : (b));
Chris@69 579 }
Chris@69 580
Chris@69 581 #define silk_LIMIT( a, limit1, limit2) ((limit1) > (limit2) ? ((a) > (limit1) ? (limit1) : ((a) < (limit2) ? (limit2) : (a))) \
Chris@69 582 : ((a) > (limit2) ? (limit2) : ((a) < (limit1) ? (limit1) : (a))))
Chris@69 583
Chris@69 584 #define silk_LIMIT_int silk_LIMIT
Chris@69 585 #define silk_LIMIT_16 silk_LIMIT
Chris@69 586 #define silk_LIMIT_32 silk_LIMIT
Chris@69 587
Chris@69 588 #define silk_abs(a) (((a) > 0) ? (a) : -(a)) /* Be careful, silk_abs returns wrong when input equals to silk_intXX_MIN */
Chris@69 589 #define silk_abs_int(a) (((a) ^ ((a) >> (8 * sizeof(a) - 1))) - ((a) >> (8 * sizeof(a) - 1)))
Chris@69 590 #define silk_abs_int32(a) (((a) ^ ((a) >> 31)) - ((a) >> 31))
Chris@69 591 #define silk_abs_int64(a) (((a) > 0) ? (a) : -(a))
Chris@69 592
Chris@69 593 #define silk_sign(a) ((a) > 0 ? 1 : ( (a) < 0 ? -1 : 0 ))
Chris@69 594
Chris@69 595 /* PSEUDO-RANDOM GENERATOR */
Chris@69 596 /* Make sure to store the result as the seed for the next call (also in between */
Chris@69 597 /* frames), otherwise result won't be random at all. When only using some of the */
Chris@69 598 /* bits, take the most significant bits by right-shifting. */
Chris@69 599 #define RAND_MULTIPLIER 196314165
Chris@69 600 #define RAND_INCREMENT 907633515
Chris@69 601 #define silk_RAND(seed) (silk_MLA_ovflw((RAND_INCREMENT), (seed), (RAND_MULTIPLIER)))
Chris@69 602
Chris@69 603 /* Add some multiplication functions that can be easily mapped to ARM. */
Chris@69 604
Chris@69 605 /* silk_SMMUL: Signed top word multiply.
Chris@69 606 ARMv6 2 instruction cycles.
Chris@69 607 ARMv3M+ 3 instruction cycles. use SMULL and ignore LSB registers.(except xM)*/
Chris@69 608 /*#define silk_SMMUL(a32, b32) (opus_int32)silk_RSHIFT(silk_SMLAL(silk_SMULWB((a32), (b32)), (a32), silk_RSHIFT_ROUND((b32), 16)), 16)*/
Chris@69 609 /* the following seems faster on x86 */
Chris@69 610 #define silk_SMMUL(a32, b32) (opus_int32)silk_RSHIFT64(silk_SMULL((a32), (b32)), 32)
Chris@69 611
Chris@69 612 #if !defined(OPUS_X86_MAY_HAVE_SSE4_1)
Chris@69 613 #define silk_burg_modified(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch) \
Chris@69 614 ((void)(arch), silk_burg_modified_c(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch))
Chris@69 615
Chris@69 616 #define silk_inner_prod16_aligned_64(inVec1, inVec2, len, arch) \
Chris@69 617 ((void)(arch),silk_inner_prod16_aligned_64_c(inVec1, inVec2, len))
Chris@69 618 #endif
Chris@69 619
Chris@69 620 #include "Inlines.h"
Chris@69 621 #include "MacroCount.h"
Chris@69 622 #include "MacroDebug.h"
Chris@69 623
Chris@69 624 #ifdef OPUS_ARM_INLINE_ASM
Chris@69 625 #include "arm/SigProc_FIX_armv4.h"
Chris@69 626 #endif
Chris@69 627
Chris@69 628 #ifdef OPUS_ARM_INLINE_EDSP
Chris@69 629 #include "arm/SigProc_FIX_armv5e.h"
Chris@69 630 #endif
Chris@69 631
Chris@69 632 #if defined(MIPSr1_ASM)
Chris@69 633 #include "mips/sigproc_fix_mipsr1.h"
Chris@69 634 #endif
Chris@69 635
Chris@69 636
Chris@69 637 #ifdef __cplusplus
Chris@69 638 }
Chris@69 639 #endif
Chris@69 640
Chris@69 641 #endif /* SILK_SIGPROC_FIX_H */