annotate src/opus-1.3/silk/SigProc_FIX.h @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 7aeed7906520
children
rev   line source
Chris@69 1 /***********************************************************************
Chris@69 2 Copyright (c) 2006-2011, Skype Limited. All rights reserved.
Chris@69 3 Redistribution and use in source and binary forms, with or without
Chris@69 4 modification, are permitted provided that the following conditions
Chris@69 5 are met:
Chris@69 6 - Redistributions of source code must retain the above copyright notice,
Chris@69 7 this list of conditions and the following disclaimer.
Chris@69 8 - Redistributions in binary form must reproduce the above copyright
Chris@69 9 notice, this list of conditions and the following disclaimer in the
Chris@69 10 documentation and/or other materials provided with the distribution.
Chris@69 11 - Neither the name of Internet Society, IETF or IETF Trust, nor the
Chris@69 12 names of specific contributors, may be used to endorse or promote
Chris@69 13 products derived from this software without specific prior written
Chris@69 14 permission.
Chris@69 15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
Chris@69 16 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
Chris@69 17 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
Chris@69 18 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
Chris@69 19 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
Chris@69 20 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
Chris@69 21 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
Chris@69 22 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
Chris@69 23 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
Chris@69 24 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
Chris@69 25 POSSIBILITY OF SUCH DAMAGE.
Chris@69 26 ***********************************************************************/
Chris@69 27
Chris@69 28 #ifndef SILK_SIGPROC_FIX_H
Chris@69 29 #define SILK_SIGPROC_FIX_H
Chris@69 30
Chris@69 31 #ifdef __cplusplus
Chris@69 32 extern "C"
Chris@69 33 {
Chris@69 34 #endif
Chris@69 35
Chris@69 36 /*#define silk_MACRO_COUNT */ /* Used to enable WMOPS counting */
Chris@69 37
Chris@69 38 #define SILK_MAX_ORDER_LPC 24 /* max order of the LPC analysis in schur() and k2a() */
Chris@69 39
Chris@69 40 #include <string.h> /* for memset(), memcpy(), memmove() */
Chris@69 41 #include "typedef.h"
Chris@69 42 #include "resampler_structs.h"
Chris@69 43 #include "macros.h"
Chris@69 44 #include "cpu_support.h"
Chris@69 45
Chris@69 46 #if defined(OPUS_X86_MAY_HAVE_SSE4_1)
Chris@69 47 #include "x86/SigProc_FIX_sse.h"
Chris@69 48 #endif
Chris@69 49
Chris@69 50 #if (defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR))
Chris@69 51 #include "arm/biquad_alt_arm.h"
Chris@69 52 #include "arm/LPC_inv_pred_gain_arm.h"
Chris@69 53 #endif
Chris@69 54
Chris@69 55 /********************************************************************/
Chris@69 56 /* SIGNAL PROCESSING FUNCTIONS */
Chris@69 57 /********************************************************************/
Chris@69 58
Chris@69 59 /*!
Chris@69 60 * Initialize/reset the resampler state for a given pair of input/output sampling rates
Chris@69 61 */
Chris@69 62 opus_int silk_resampler_init(
Chris@69 63 silk_resampler_state_struct *S, /* I/O Resampler state */
Chris@69 64 opus_int32 Fs_Hz_in, /* I Input sampling rate (Hz) */
Chris@69 65 opus_int32 Fs_Hz_out, /* I Output sampling rate (Hz) */
Chris@69 66 opus_int forEnc /* I If 1: encoder; if 0: decoder */
Chris@69 67 );
Chris@69 68
Chris@69 69 /*!
Chris@69 70 * Resampler: convert from one sampling rate to another
Chris@69 71 */
Chris@69 72 opus_int silk_resampler(
Chris@69 73 silk_resampler_state_struct *S, /* I/O Resampler state */
Chris@69 74 opus_int16 out[], /* O Output signal */
Chris@69 75 const opus_int16 in[], /* I Input signal */
Chris@69 76 opus_int32 inLen /* I Number of input samples */
Chris@69 77 );
Chris@69 78
Chris@69 79 /*!
Chris@69 80 * Downsample 2x, mediocre quality
Chris@69 81 */
Chris@69 82 void silk_resampler_down2(
Chris@69 83 opus_int32 *S, /* I/O State vector [ 2 ] */
Chris@69 84 opus_int16 *out, /* O Output signal [ len ] */
Chris@69 85 const opus_int16 *in, /* I Input signal [ floor(len/2) ] */
Chris@69 86 opus_int32 inLen /* I Number of input samples */
Chris@69 87 );
Chris@69 88
Chris@69 89 /*!
Chris@69 90 * Downsample by a factor 2/3, low quality
Chris@69 91 */
Chris@69 92 void silk_resampler_down2_3(
Chris@69 93 opus_int32 *S, /* I/O State vector [ 6 ] */
Chris@69 94 opus_int16 *out, /* O Output signal [ floor(2*inLen/3) ] */
Chris@69 95 const opus_int16 *in, /* I Input signal [ inLen ] */
Chris@69 96 opus_int32 inLen /* I Number of input samples */
Chris@69 97 );
Chris@69 98
Chris@69 99 /*!
Chris@69 100 * second order ARMA filter;
Chris@69 101 * slower than biquad() but uses more precise coefficients
Chris@69 102 * can handle (slowly) varying coefficients
Chris@69 103 */
Chris@69 104 void silk_biquad_alt_stride1(
Chris@69 105 const opus_int16 *in, /* I input signal */
Chris@69 106 const opus_int32 *B_Q28, /* I MA coefficients [3] */
Chris@69 107 const opus_int32 *A_Q28, /* I AR coefficients [2] */
Chris@69 108 opus_int32 *S, /* I/O State vector [2] */
Chris@69 109 opus_int16 *out, /* O output signal */
Chris@69 110 const opus_int32 len /* I signal length (must be even) */
Chris@69 111 );
Chris@69 112
Chris@69 113 void silk_biquad_alt_stride2_c(
Chris@69 114 const opus_int16 *in, /* I input signal */
Chris@69 115 const opus_int32 *B_Q28, /* I MA coefficients [3] */
Chris@69 116 const opus_int32 *A_Q28, /* I AR coefficients [2] */
Chris@69 117 opus_int32 *S, /* I/O State vector [4] */
Chris@69 118 opus_int16 *out, /* O output signal */
Chris@69 119 const opus_int32 len /* I signal length (must be even) */
Chris@69 120 );
Chris@69 121
Chris@69 122 /* Variable order MA prediction error filter. */
Chris@69 123 void silk_LPC_analysis_filter(
Chris@69 124 opus_int16 *out, /* O Output signal */
Chris@69 125 const opus_int16 *in, /* I Input signal */
Chris@69 126 const opus_int16 *B, /* I MA prediction coefficients, Q12 [order] */
Chris@69 127 const opus_int32 len, /* I Signal length */
Chris@69 128 const opus_int32 d, /* I Filter order */
Chris@69 129 int arch /* I Run-time architecture */
Chris@69 130 );
Chris@69 131
Chris@69 132 /* Chirp (bandwidth expand) LP AR filter */
Chris@69 133 void silk_bwexpander(
Chris@69 134 opus_int16 *ar, /* I/O AR filter to be expanded (without leading 1) */
Chris@69 135 const opus_int d, /* I Length of ar */
Chris@69 136 opus_int32 chirp_Q16 /* I Chirp factor (typically in the range 0 to 1) */
Chris@69 137 );
Chris@69 138
Chris@69 139 /* Chirp (bandwidth expand) LP AR filter */
Chris@69 140 void silk_bwexpander_32(
Chris@69 141 opus_int32 *ar, /* I/O AR filter to be expanded (without leading 1) */
Chris@69 142 const opus_int d, /* I Length of ar */
Chris@69 143 opus_int32 chirp_Q16 /* I Chirp factor in Q16 */
Chris@69 144 );
Chris@69 145
Chris@69 146 /* Compute inverse of LPC prediction gain, and */
Chris@69 147 /* test if LPC coefficients are stable (all poles within unit circle) */
Chris@69 148 opus_int32 silk_LPC_inverse_pred_gain_c( /* O Returns inverse prediction gain in energy domain, Q30 */
Chris@69 149 const opus_int16 *A_Q12, /* I Prediction coefficients, Q12 [order] */
Chris@69 150 const opus_int order /* I Prediction order */
Chris@69 151 );
Chris@69 152
Chris@69 153 /* Split signal in two decimated bands using first-order allpass filters */
Chris@69 154 void silk_ana_filt_bank_1(
Chris@69 155 const opus_int16 *in, /* I Input signal [N] */
Chris@69 156 opus_int32 *S, /* I/O State vector [2] */
Chris@69 157 opus_int16 *outL, /* O Low band [N/2] */
Chris@69 158 opus_int16 *outH, /* O High band [N/2] */
Chris@69 159 const opus_int32 N /* I Number of input samples */
Chris@69 160 );
Chris@69 161
Chris@69 162 #if !defined(OVERRIDE_silk_biquad_alt_stride2)
Chris@69 163 #define silk_biquad_alt_stride2(in, B_Q28, A_Q28, S, out, len, arch) ((void)(arch), silk_biquad_alt_stride2_c(in, B_Q28, A_Q28, S, out, len))
Chris@69 164 #endif
Chris@69 165
Chris@69 166 #if !defined(OVERRIDE_silk_LPC_inverse_pred_gain)
Chris@69 167 #define silk_LPC_inverse_pred_gain(A_Q12, order, arch) ((void)(arch), silk_LPC_inverse_pred_gain_c(A_Q12, order))
Chris@69 168 #endif
Chris@69 169
Chris@69 170 /********************************************************************/
Chris@69 171 /* SCALAR FUNCTIONS */
Chris@69 172 /********************************************************************/
Chris@69 173
Chris@69 174 /* Approximation of 128 * log2() (exact inverse of approx 2^() below) */
Chris@69 175 /* Convert input to a log scale */
Chris@69 176 opus_int32 silk_lin2log(
Chris@69 177 const opus_int32 inLin /* I input in linear scale */
Chris@69 178 );
Chris@69 179
Chris@69 180 /* Approximation of a sigmoid function */
Chris@69 181 opus_int silk_sigm_Q15(
Chris@69 182 opus_int in_Q5 /* I */
Chris@69 183 );
Chris@69 184
Chris@69 185 /* Approximation of 2^() (exact inverse of approx log2() above) */
Chris@69 186 /* Convert input to a linear scale */
Chris@69 187 opus_int32 silk_log2lin(
Chris@69 188 const opus_int32 inLog_Q7 /* I input on log scale */
Chris@69 189 );
Chris@69 190
Chris@69 191 /* Compute number of bits to right shift the sum of squares of a vector */
Chris@69 192 /* of int16s to make it fit in an int32 */
Chris@69 193 void silk_sum_sqr_shift(
Chris@69 194 opus_int32 *energy, /* O Energy of x, after shifting to the right */
Chris@69 195 opus_int *shift, /* O Number of bits right shift applied to energy */
Chris@69 196 const opus_int16 *x, /* I Input vector */
Chris@69 197 opus_int len /* I Length of input vector */
Chris@69 198 );
Chris@69 199
Chris@69 200 /* Calculates the reflection coefficients from the correlation sequence */
Chris@69 201 /* Faster than schur64(), but much less accurate. */
Chris@69 202 /* uses SMLAWB(), requiring armv5E and higher. */
Chris@69 203 opus_int32 silk_schur( /* O Returns residual energy */
Chris@69 204 opus_int16 *rc_Q15, /* O reflection coefficients [order] Q15 */
Chris@69 205 const opus_int32 *c, /* I correlations [order+1] */
Chris@69 206 const opus_int32 order /* I prediction order */
Chris@69 207 );
Chris@69 208
Chris@69 209 /* Calculates the reflection coefficients from the correlation sequence */
Chris@69 210 /* Slower than schur(), but more accurate. */
Chris@69 211 /* Uses SMULL(), available on armv4 */
Chris@69 212 opus_int32 silk_schur64( /* O returns residual energy */
Chris@69 213 opus_int32 rc_Q16[], /* O Reflection coefficients [order] Q16 */
Chris@69 214 const opus_int32 c[], /* I Correlations [order+1] */
Chris@69 215 opus_int32 order /* I Prediction order */
Chris@69 216 );
Chris@69 217
Chris@69 218 /* Step up function, converts reflection coefficients to prediction coefficients */
Chris@69 219 void silk_k2a(
Chris@69 220 opus_int32 *A_Q24, /* O Prediction coefficients [order] Q24 */
Chris@69 221 const opus_int16 *rc_Q15, /* I Reflection coefficients [order] Q15 */
Chris@69 222 const opus_int32 order /* I Prediction order */
Chris@69 223 );
Chris@69 224
Chris@69 225 /* Step up function, converts reflection coefficients to prediction coefficients */
Chris@69 226 void silk_k2a_Q16(
Chris@69 227 opus_int32 *A_Q24, /* O Prediction coefficients [order] Q24 */
Chris@69 228 const opus_int32 *rc_Q16, /* I Reflection coefficients [order] Q16 */
Chris@69 229 const opus_int32 order /* I Prediction order */
Chris@69 230 );
Chris@69 231
Chris@69 232 /* Apply sine window to signal vector. */
Chris@69 233 /* Window types: */
Chris@69 234 /* 1 -> sine window from 0 to pi/2 */
Chris@69 235 /* 2 -> sine window from pi/2 to pi */
Chris@69 236 /* every other sample of window is linearly interpolated, for speed */
Chris@69 237 void silk_apply_sine_window(
Chris@69 238 opus_int16 px_win[], /* O Pointer to windowed signal */
Chris@69 239 const opus_int16 px[], /* I Pointer to input signal */
Chris@69 240 const opus_int win_type, /* I Selects a window type */
Chris@69 241 const opus_int length /* I Window length, multiple of 4 */
Chris@69 242 );
Chris@69 243
Chris@69 244 /* Compute autocorrelation */
Chris@69 245 void silk_autocorr(
Chris@69 246 opus_int32 *results, /* O Result (length correlationCount) */
Chris@69 247 opus_int *scale, /* O Scaling of the correlation vector */
Chris@69 248 const opus_int16 *inputData, /* I Input data to correlate */
Chris@69 249 const opus_int inputDataSize, /* I Length of input */
Chris@69 250 const opus_int correlationCount, /* I Number of correlation taps to compute */
Chris@69 251 int arch /* I Run-time architecture */
Chris@69 252 );
Chris@69 253
Chris@69 254 void silk_decode_pitch(
Chris@69 255 opus_int16 lagIndex, /* I */
Chris@69 256 opus_int8 contourIndex, /* O */
Chris@69 257 opus_int pitch_lags[], /* O 4 pitch values */
Chris@69 258 const opus_int Fs_kHz, /* I sampling frequency (kHz) */
Chris@69 259 const opus_int nb_subfr /* I number of sub frames */
Chris@69 260 );
Chris@69 261
Chris@69 262 opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 voiced, 1 unvoiced */
Chris@69 263 const opus_int16 *frame, /* I Signal of length PE_FRAME_LENGTH_MS*Fs_kHz */
Chris@69 264 opus_int *pitch_out, /* O 4 pitch lag values */
Chris@69 265 opus_int16 *lagIndex, /* O Lag Index */
Chris@69 266 opus_int8 *contourIndex, /* O Pitch contour Index */
Chris@69 267 opus_int *LTPCorr_Q15, /* I/O Normalized correlation; input: value from previous frame */
Chris@69 268 opus_int prevLag, /* I Last lag of previous frame; set to zero is unvoiced */
Chris@69 269 const opus_int32 search_thres1_Q16, /* I First stage threshold for lag candidates 0 - 1 */
Chris@69 270 const opus_int search_thres2_Q13, /* I Final threshold for lag candidates 0 - 1 */
Chris@69 271 const opus_int Fs_kHz, /* I Sample frequency (kHz) */
Chris@69 272 const opus_int complexity, /* I Complexity setting, 0-2, where 2 is highest */
Chris@69 273 const opus_int nb_subfr, /* I number of 5 ms subframes */
Chris@69 274 int arch /* I Run-time architecture */
Chris@69 275 );
Chris@69 276
Chris@69 277 /* Compute Normalized Line Spectral Frequencies (NLSFs) from whitening filter coefficients */
Chris@69 278 /* If not all roots are found, the a_Q16 coefficients are bandwidth expanded until convergence. */
Chris@69 279 void silk_A2NLSF(
Chris@69 280 opus_int16 *NLSF, /* O Normalized Line Spectral Frequencies in Q15 (0..2^15-1) [d] */
Chris@69 281 opus_int32 *a_Q16, /* I/O Monic whitening filter coefficients in Q16 [d] */
Chris@69 282 const opus_int d /* I Filter order (must be even) */
Chris@69 283 );
Chris@69 284
Chris@69 285 /* compute whitening filter coefficients from normalized line spectral frequencies */
Chris@69 286 void silk_NLSF2A(
Chris@69 287 opus_int16 *a_Q12, /* O monic whitening filter coefficients in Q12, [ d ] */
Chris@69 288 const opus_int16 *NLSF, /* I normalized line spectral frequencies in Q15, [ d ] */
Chris@69 289 const opus_int d, /* I filter order (should be even) */
Chris@69 290 int arch /* I Run-time architecture */
Chris@69 291 );
Chris@69 292
Chris@69 293 /* Convert int32 coefficients to int16 coefs and make sure there's no wrap-around */
Chris@69 294 void silk_LPC_fit(
Chris@69 295 opus_int16 *a_QOUT, /* O Output signal */
Chris@69 296 opus_int32 *a_QIN, /* I/O Input signal */
Chris@69 297 const opus_int QOUT, /* I Input Q domain */
Chris@69 298 const opus_int QIN, /* I Input Q domain */
Chris@69 299 const opus_int d /* I Filter order */
Chris@69 300 );
Chris@69 301
Chris@69 302 void silk_insertion_sort_increasing(
Chris@69 303 opus_int32 *a, /* I/O Unsorted / Sorted vector */
Chris@69 304 opus_int *idx, /* O Index vector for the sorted elements */
Chris@69 305 const opus_int L, /* I Vector length */
Chris@69 306 const opus_int K /* I Number of correctly sorted positions */
Chris@69 307 );
Chris@69 308
Chris@69 309 void silk_insertion_sort_decreasing_int16(
Chris@69 310 opus_int16 *a, /* I/O Unsorted / Sorted vector */
Chris@69 311 opus_int *idx, /* O Index vector for the sorted elements */
Chris@69 312 const opus_int L, /* I Vector length */
Chris@69 313 const opus_int K /* I Number of correctly sorted positions */
Chris@69 314 );
Chris@69 315
Chris@69 316 void silk_insertion_sort_increasing_all_values_int16(
Chris@69 317 opus_int16 *a, /* I/O Unsorted / Sorted vector */
Chris@69 318 const opus_int L /* I Vector length */
Chris@69 319 );
Chris@69 320
Chris@69 321 /* NLSF stabilizer, for a single input data vector */
Chris@69 322 void silk_NLSF_stabilize(
Chris@69 323 opus_int16 *NLSF_Q15, /* I/O Unstable/stabilized normalized LSF vector in Q15 [L] */
Chris@69 324 const opus_int16 *NDeltaMin_Q15, /* I Min distance vector, NDeltaMin_Q15[L] must be >= 1 [L+1] */
Chris@69 325 const opus_int L /* I Number of NLSF parameters in the input vector */
Chris@69 326 );
Chris@69 327
Chris@69 328 /* Laroia low complexity NLSF weights */
Chris@69 329 void silk_NLSF_VQ_weights_laroia(
Chris@69 330 opus_int16 *pNLSFW_Q_OUT, /* O Pointer to input vector weights [D] */
Chris@69 331 const opus_int16 *pNLSF_Q15, /* I Pointer to input vector [D] */
Chris@69 332 const opus_int D /* I Input vector dimension (even) */
Chris@69 333 );
Chris@69 334
Chris@69 335 /* Compute reflection coefficients from input signal */
Chris@69 336 void silk_burg_modified_c(
Chris@69 337 opus_int32 *res_nrg, /* O Residual energy */
Chris@69 338 opus_int *res_nrg_Q, /* O Residual energy Q value */
Chris@69 339 opus_int32 A_Q16[], /* O Prediction coefficients (length order) */
Chris@69 340 const opus_int16 x[], /* I Input signal, length: nb_subfr * ( D + subfr_length ) */
Chris@69 341 const opus_int32 minInvGain_Q30, /* I Inverse of max prediction gain */
Chris@69 342 const opus_int subfr_length, /* I Input signal subframe length (incl. D preceding samples) */
Chris@69 343 const opus_int nb_subfr, /* I Number of subframes stacked in x */
Chris@69 344 const opus_int D, /* I Order */
Chris@69 345 int arch /* I Run-time architecture */
Chris@69 346 );
Chris@69 347
Chris@69 348 /* Copy and multiply a vector by a constant */
Chris@69 349 void silk_scale_copy_vector16(
Chris@69 350 opus_int16 *data_out,
Chris@69 351 const opus_int16 *data_in,
Chris@69 352 opus_int32 gain_Q16, /* I Gain in Q16 */
Chris@69 353 const opus_int dataSize /* I Length */
Chris@69 354 );
Chris@69 355
Chris@69 356 /* Some for the LTP related function requires Q26 to work.*/
Chris@69 357 void silk_scale_vector32_Q26_lshift_18(
Chris@69 358 opus_int32 *data1, /* I/O Q0/Q18 */
Chris@69 359 opus_int32 gain_Q26, /* I Q26 */
Chris@69 360 opus_int dataSize /* I length */
Chris@69 361 );
Chris@69 362
Chris@69 363 /********************************************************************/
Chris@69 364 /* INLINE ARM MATH */
Chris@69 365 /********************************************************************/
Chris@69 366
Chris@69 367 /* return sum( inVec1[i] * inVec2[i] ) */
Chris@69 368
Chris@69 369 opus_int32 silk_inner_prod_aligned(
Chris@69 370 const opus_int16 *const inVec1, /* I input vector 1 */
Chris@69 371 const opus_int16 *const inVec2, /* I input vector 2 */
Chris@69 372 const opus_int len, /* I vector lengths */
Chris@69 373 int arch /* I Run-time architecture */
Chris@69 374 );
Chris@69 375
Chris@69 376
Chris@69 377 opus_int32 silk_inner_prod_aligned_scale(
Chris@69 378 const opus_int16 *const inVec1, /* I input vector 1 */
Chris@69 379 const opus_int16 *const inVec2, /* I input vector 2 */
Chris@69 380 const opus_int scale, /* I number of bits to shift */
Chris@69 381 const opus_int len /* I vector lengths */
Chris@69 382 );
Chris@69 383
Chris@69 384 opus_int64 silk_inner_prod16_aligned_64_c(
Chris@69 385 const opus_int16 *inVec1, /* I input vector 1 */
Chris@69 386 const opus_int16 *inVec2, /* I input vector 2 */
Chris@69 387 const opus_int len /* I vector lengths */
Chris@69 388 );
Chris@69 389
Chris@69 390 /********************************************************************/
Chris@69 391 /* MACROS */
Chris@69 392 /********************************************************************/
Chris@69 393
Chris@69 394 /* Rotate a32 right by 'rot' bits. Negative rot values result in rotating
Chris@69 395 left. Output is 32bit int.
Chris@69 396 Note: contemporary compilers recognize the C expression below and
Chris@69 397 compile it into a 'ror' instruction if available. No need for OPUS_INLINE ASM! */
Chris@69 398 static OPUS_INLINE opus_int32 silk_ROR32( opus_int32 a32, opus_int rot )
Chris@69 399 {
Chris@69 400 opus_uint32 x = (opus_uint32) a32;
Chris@69 401 opus_uint32 r = (opus_uint32) rot;
Chris@69 402 opus_uint32 m = (opus_uint32) -rot;
Chris@69 403 if( rot == 0 ) {
Chris@69 404 return a32;
Chris@69 405 } else if( rot < 0 ) {
Chris@69 406 return (opus_int32) ((x << m) | (x >> (32 - m)));
Chris@69 407 } else {
Chris@69 408 return (opus_int32) ((x << (32 - r)) | (x >> r));
Chris@69 409 }
Chris@69 410 }
Chris@69 411
Chris@69 412 /* Allocate opus_int16 aligned to 4-byte memory address */
Chris@69 413 #if EMBEDDED_ARM
Chris@69 414 #define silk_DWORD_ALIGN __attribute__((aligned(4)))
Chris@69 415 #else
Chris@69 416 #define silk_DWORD_ALIGN
Chris@69 417 #endif
Chris@69 418
Chris@69 419 /* Useful Macros that can be adjusted to other platforms */
Chris@69 420 #define silk_memcpy(dest, src, size) memcpy((dest), (src), (size))
Chris@69 421 #define silk_memset(dest, src, size) memset((dest), (src), (size))
Chris@69 422 #define silk_memmove(dest, src, size) memmove((dest), (src), (size))
Chris@69 423
Chris@69 424 /* Fixed point macros */
Chris@69 425
Chris@69 426 /* (a32 * b32) output have to be 32bit int */
Chris@69 427 #define silk_MUL(a32, b32) ((a32) * (b32))
Chris@69 428
Chris@69 429 /* (a32 * b32) output have to be 32bit uint */
Chris@69 430 #define silk_MUL_uint(a32, b32) silk_MUL(a32, b32)
Chris@69 431
Chris@69 432 /* a32 + (b32 * c32) output have to be 32bit int */
Chris@69 433 #define silk_MLA(a32, b32, c32) silk_ADD32((a32),((b32) * (c32)))
Chris@69 434
Chris@69 435 /* a32 + (b32 * c32) output have to be 32bit uint */
Chris@69 436 #define silk_MLA_uint(a32, b32, c32) silk_MLA(a32, b32, c32)
Chris@69 437
Chris@69 438 /* ((a32 >> 16) * (b32 >> 16)) output have to be 32bit int */
Chris@69 439 #define silk_SMULTT(a32, b32) (((a32) >> 16) * ((b32) >> 16))
Chris@69 440
Chris@69 441 /* a32 + ((a32 >> 16) * (b32 >> 16)) output have to be 32bit int */
Chris@69 442 #define silk_SMLATT(a32, b32, c32) silk_ADD32((a32),((b32) >> 16) * ((c32) >> 16))
Chris@69 443
Chris@69 444 #define silk_SMLALBB(a64, b16, c16) silk_ADD64((a64),(opus_int64)((opus_int32)(b16) * (opus_int32)(c16)))
Chris@69 445
Chris@69 446 /* (a32 * b32) */
Chris@69 447 #define silk_SMULL(a32, b32) ((opus_int64)(a32) * /*(opus_int64)*/(b32))
Chris@69 448
Chris@69 449 /* Adds two signed 32-bit values in a way that can overflow, while not relying on undefined behaviour
Chris@69 450 (just standard two's complement implementation-specific behaviour) */
Chris@69 451 #define silk_ADD32_ovflw(a, b) ((opus_int32)((opus_uint32)(a) + (opus_uint32)(b)))
Chris@69 452 /* Subtractss two signed 32-bit values in a way that can overflow, while not relying on undefined behaviour
Chris@69 453 (just standard two's complement implementation-specific behaviour) */
Chris@69 454 #define silk_SUB32_ovflw(a, b) ((opus_int32)((opus_uint32)(a) - (opus_uint32)(b)))
Chris@69 455
Chris@69 456 /* Multiply-accumulate macros that allow overflow in the addition (ie, no asserts in debug mode) */
Chris@69 457 #define silk_MLA_ovflw(a32, b32, c32) silk_ADD32_ovflw((a32), (opus_uint32)(b32) * (opus_uint32)(c32))
Chris@69 458 #define silk_SMLABB_ovflw(a32, b32, c32) (silk_ADD32_ovflw((a32) , ((opus_int32)((opus_int16)(b32))) * (opus_int32)((opus_int16)(c32))))
Chris@69 459
Chris@69 460 #define silk_DIV32_16(a32, b16) ((opus_int32)((a32) / (b16)))
Chris@69 461 #define silk_DIV32(a32, b32) ((opus_int32)((a32) / (b32)))
Chris@69 462
Chris@69 463 /* These macros enables checking for overflow in silk_API_Debug.h*/
Chris@69 464 #define silk_ADD16(a, b) ((a) + (b))
Chris@69 465 #define silk_ADD32(a, b) ((a) + (b))
Chris@69 466 #define silk_ADD64(a, b) ((a) + (b))
Chris@69 467
Chris@69 468 #define silk_SUB16(a, b) ((a) - (b))
Chris@69 469 #define silk_SUB32(a, b) ((a) - (b))
Chris@69 470 #define silk_SUB64(a, b) ((a) - (b))
Chris@69 471
Chris@69 472 #define silk_SAT8(a) ((a) > silk_int8_MAX ? silk_int8_MAX : \
Chris@69 473 ((a) < silk_int8_MIN ? silk_int8_MIN : (a)))
Chris@69 474 #define silk_SAT16(a) ((a) > silk_int16_MAX ? silk_int16_MAX : \
Chris@69 475 ((a) < silk_int16_MIN ? silk_int16_MIN : (a)))
Chris@69 476 #define silk_SAT32(a) ((a) > silk_int32_MAX ? silk_int32_MAX : \
Chris@69 477 ((a) < silk_int32_MIN ? silk_int32_MIN : (a)))
Chris@69 478
Chris@69 479 #define silk_CHECK_FIT8(a) (a)
Chris@69 480 #define silk_CHECK_FIT16(a) (a)
Chris@69 481 #define silk_CHECK_FIT32(a) (a)
Chris@69 482
Chris@69 483 #define silk_ADD_SAT16(a, b) (opus_int16)silk_SAT16( silk_ADD32( (opus_int32)(a), (b) ) )
Chris@69 484 #define silk_ADD_SAT64(a, b) ((((a) + (b)) & 0x8000000000000000LL) == 0 ? \
Chris@69 485 ((((a) & (b)) & 0x8000000000000000LL) != 0 ? silk_int64_MIN : (a)+(b)) : \
Chris@69 486 ((((a) | (b)) & 0x8000000000000000LL) == 0 ? silk_int64_MAX : (a)+(b)) )
Chris@69 487
Chris@69 488 #define silk_SUB_SAT16(a, b) (opus_int16)silk_SAT16( silk_SUB32( (opus_int32)(a), (b) ) )
Chris@69 489 #define silk_SUB_SAT64(a, b) ((((a)-(b)) & 0x8000000000000000LL) == 0 ? \
Chris@69 490 (( (a) & ((b)^0x8000000000000000LL) & 0x8000000000000000LL) ? silk_int64_MIN : (a)-(b)) : \
Chris@69 491 ((((a)^0x8000000000000000LL) & (b) & 0x8000000000000000LL) ? silk_int64_MAX : (a)-(b)) )
Chris@69 492
Chris@69 493 /* Saturation for positive input values */
Chris@69 494 #define silk_POS_SAT32(a) ((a) > silk_int32_MAX ? silk_int32_MAX : (a))
Chris@69 495
Chris@69 496 /* Add with saturation for positive input values */
Chris@69 497 #define silk_ADD_POS_SAT8(a, b) ((((a)+(b)) & 0x80) ? silk_int8_MAX : ((a)+(b)))
Chris@69 498 #define silk_ADD_POS_SAT16(a, b) ((((a)+(b)) & 0x8000) ? silk_int16_MAX : ((a)+(b)))
Chris@69 499 #define silk_ADD_POS_SAT32(a, b) ((((opus_uint32)(a)+(opus_uint32)(b)) & 0x80000000) ? silk_int32_MAX : ((a)+(b)))
Chris@69 500
Chris@69 501 #define silk_LSHIFT8(a, shift) ((opus_int8)((opus_uint8)(a)<<(shift))) /* shift >= 0, shift < 8 */
Chris@69 502 #define silk_LSHIFT16(a, shift) ((opus_int16)((opus_uint16)(a)<<(shift))) /* shift >= 0, shift < 16 */
Chris@69 503 #define silk_LSHIFT32(a, shift) ((opus_int32)((opus_uint32)(a)<<(shift))) /* shift >= 0, shift < 32 */
Chris@69 504 #define silk_LSHIFT64(a, shift) ((opus_int64)((opus_uint64)(a)<<(shift))) /* shift >= 0, shift < 64 */
Chris@69 505 #define silk_LSHIFT(a, shift) silk_LSHIFT32(a, shift) /* shift >= 0, shift < 32 */
Chris@69 506
Chris@69 507 #define silk_RSHIFT8(a, shift) ((a)>>(shift)) /* shift >= 0, shift < 8 */
Chris@69 508 #define silk_RSHIFT16(a, shift) ((a)>>(shift)) /* shift >= 0, shift < 16 */
Chris@69 509 #define silk_RSHIFT32(a, shift) ((a)>>(shift)) /* shift >= 0, shift < 32 */
Chris@69 510 #define silk_RSHIFT64(a, shift) ((a)>>(shift)) /* shift >= 0, shift < 64 */
Chris@69 511 #define silk_RSHIFT(a, shift) silk_RSHIFT32(a, shift) /* shift >= 0, shift < 32 */
Chris@69 512
Chris@69 513 /* saturates before shifting */
Chris@69 514 #define silk_LSHIFT_SAT32(a, shift) (silk_LSHIFT32( silk_LIMIT( (a), silk_RSHIFT32( silk_int32_MIN, (shift) ), \
Chris@69 515 silk_RSHIFT32( silk_int32_MAX, (shift) ) ), (shift) ))
Chris@69 516
Chris@69 517 #define silk_LSHIFT_ovflw(a, shift) ((opus_int32)((opus_uint32)(a) << (shift))) /* shift >= 0, allowed to overflow */
Chris@69 518 #define silk_LSHIFT_uint(a, shift) ((a) << (shift)) /* shift >= 0 */
Chris@69 519 #define silk_RSHIFT_uint(a, shift) ((a) >> (shift)) /* shift >= 0 */
Chris@69 520
Chris@69 521 #define silk_ADD_LSHIFT(a, b, shift) ((a) + silk_LSHIFT((b), (shift))) /* shift >= 0 */
Chris@69 522 #define silk_ADD_LSHIFT32(a, b, shift) silk_ADD32((a), silk_LSHIFT32((b), (shift))) /* shift >= 0 */
Chris@69 523 #define silk_ADD_LSHIFT_uint(a, b, shift) ((a) + silk_LSHIFT_uint((b), (shift))) /* shift >= 0 */
Chris@69 524 #define silk_ADD_RSHIFT(a, b, shift) ((a) + silk_RSHIFT((b), (shift))) /* shift >= 0 */
Chris@69 525 #define silk_ADD_RSHIFT32(a, b, shift) silk_ADD32((a), silk_RSHIFT32((b), (shift))) /* shift >= 0 */
Chris@69 526 #define silk_ADD_RSHIFT_uint(a, b, shift) ((a) + silk_RSHIFT_uint((b), (shift))) /* shift >= 0 */
Chris@69 527 #define silk_SUB_LSHIFT32(a, b, shift) silk_SUB32((a), silk_LSHIFT32((b), (shift))) /* shift >= 0 */
Chris@69 528 #define silk_SUB_RSHIFT32(a, b, shift) silk_SUB32((a), silk_RSHIFT32((b), (shift))) /* shift >= 0 */
Chris@69 529
Chris@69 530 /* Requires that shift > 0 */
Chris@69 531 #define silk_RSHIFT_ROUND(a, shift) ((shift) == 1 ? ((a) >> 1) + ((a) & 1) : (((a) >> ((shift) - 1)) + 1) >> 1)
Chris@69 532 #define silk_RSHIFT_ROUND64(a, shift) ((shift) == 1 ? ((a) >> 1) + ((a) & 1) : (((a) >> ((shift) - 1)) + 1) >> 1)
Chris@69 533
Chris@69 534 /* Number of rightshift required to fit the multiplication */
Chris@69 535 #define silk_NSHIFT_MUL_32_32(a, b) ( -(31- (32-silk_CLZ32(silk_abs(a)) + (32-silk_CLZ32(silk_abs(b))))) )
Chris@69 536 #define silk_NSHIFT_MUL_16_16(a, b) ( -(15- (16-silk_CLZ16(silk_abs(a)) + (16-silk_CLZ16(silk_abs(b))))) )
Chris@69 537
Chris@69 538
Chris@69 539 #define silk_min(a, b) (((a) < (b)) ? (a) : (b))
Chris@69 540 #define silk_max(a, b) (((a) > (b)) ? (a) : (b))
Chris@69 541
Chris@69 542 /* Macro to convert floating-point constants to fixed-point */
Chris@69 543 #define SILK_FIX_CONST( C, Q ) ((opus_int32)((C) * ((opus_int64)1 << (Q)) + 0.5))
Chris@69 544
Chris@69 545 /* silk_min() versions with typecast in the function call */
Chris@69 546 static OPUS_INLINE opus_int silk_min_int(opus_int a, opus_int b)
Chris@69 547 {
Chris@69 548 return (((a) < (b)) ? (a) : (b));
Chris@69 549 }
Chris@69 550 static OPUS_INLINE opus_int16 silk_min_16(opus_int16 a, opus_int16 b)
Chris@69 551 {
Chris@69 552 return (((a) < (b)) ? (a) : (b));
Chris@69 553 }
Chris@69 554 static OPUS_INLINE opus_int32 silk_min_32(opus_int32 a, opus_int32 b)
Chris@69 555 {
Chris@69 556 return (((a) < (b)) ? (a) : (b));
Chris@69 557 }
Chris@69 558 static OPUS_INLINE opus_int64 silk_min_64(opus_int64 a, opus_int64 b)
Chris@69 559 {
Chris@69 560 return (((a) < (b)) ? (a) : (b));
Chris@69 561 }
Chris@69 562
Chris@69 563 /* silk_min() versions with typecast in the function call */
Chris@69 564 static OPUS_INLINE opus_int silk_max_int(opus_int a, opus_int b)
Chris@69 565 {
Chris@69 566 return (((a) > (b)) ? (a) : (b));
Chris@69 567 }
Chris@69 568 static OPUS_INLINE opus_int16 silk_max_16(opus_int16 a, opus_int16 b)
Chris@69 569 {
Chris@69 570 return (((a) > (b)) ? (a) : (b));
Chris@69 571 }
Chris@69 572 static OPUS_INLINE opus_int32 silk_max_32(opus_int32 a, opus_int32 b)
Chris@69 573 {
Chris@69 574 return (((a) > (b)) ? (a) : (b));
Chris@69 575 }
Chris@69 576 static OPUS_INLINE opus_int64 silk_max_64(opus_int64 a, opus_int64 b)
Chris@69 577 {
Chris@69 578 return (((a) > (b)) ? (a) : (b));
Chris@69 579 }
Chris@69 580
Chris@69 581 #define silk_LIMIT( a, limit1, limit2) ((limit1) > (limit2) ? ((a) > (limit1) ? (limit1) : ((a) < (limit2) ? (limit2) : (a))) \
Chris@69 582 : ((a) > (limit2) ? (limit2) : ((a) < (limit1) ? (limit1) : (a))))
Chris@69 583
Chris@69 584 #define silk_LIMIT_int silk_LIMIT
Chris@69 585 #define silk_LIMIT_16 silk_LIMIT
Chris@69 586 #define silk_LIMIT_32 silk_LIMIT
Chris@69 587
Chris@69 588 #define silk_abs(a) (((a) > 0) ? (a) : -(a)) /* Be careful, silk_abs returns wrong when input equals to silk_intXX_MIN */
Chris@69 589 #define silk_abs_int(a) (((a) ^ ((a) >> (8 * sizeof(a) - 1))) - ((a) >> (8 * sizeof(a) - 1)))
Chris@69 590 #define silk_abs_int32(a) (((a) ^ ((a) >> 31)) - ((a) >> 31))
Chris@69 591 #define silk_abs_int64(a) (((a) > 0) ? (a) : -(a))
Chris@69 592
Chris@69 593 #define silk_sign(a) ((a) > 0 ? 1 : ( (a) < 0 ? -1 : 0 ))
Chris@69 594
Chris@69 595 /* PSEUDO-RANDOM GENERATOR */
Chris@69 596 /* Make sure to store the result as the seed for the next call (also in between */
Chris@69 597 /* frames), otherwise result won't be random at all. When only using some of the */
Chris@69 598 /* bits, take the most significant bits by right-shifting. */
Chris@69 599 #define RAND_MULTIPLIER 196314165
Chris@69 600 #define RAND_INCREMENT 907633515
Chris@69 601 #define silk_RAND(seed) (silk_MLA_ovflw((RAND_INCREMENT), (seed), (RAND_MULTIPLIER)))
Chris@69 602
Chris@69 603 /* Add some multiplication functions that can be easily mapped to ARM. */
Chris@69 604
Chris@69 605 /* silk_SMMUL: Signed top word multiply.
Chris@69 606 ARMv6 2 instruction cycles.
Chris@69 607 ARMv3M+ 3 instruction cycles. use SMULL and ignore LSB registers.(except xM)*/
Chris@69 608 /*#define silk_SMMUL(a32, b32) (opus_int32)silk_RSHIFT(silk_SMLAL(silk_SMULWB((a32), (b32)), (a32), silk_RSHIFT_ROUND((b32), 16)), 16)*/
Chris@69 609 /* the following seems faster on x86 */
Chris@69 610 #define silk_SMMUL(a32, b32) (opus_int32)silk_RSHIFT64(silk_SMULL((a32), (b32)), 32)
Chris@69 611
Chris@69 612 #if !defined(OPUS_X86_MAY_HAVE_SSE4_1)
Chris@69 613 #define silk_burg_modified(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch) \
Chris@69 614 ((void)(arch), silk_burg_modified_c(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch))
Chris@69 615
Chris@69 616 #define silk_inner_prod16_aligned_64(inVec1, inVec2, len, arch) \
Chris@69 617 ((void)(arch),silk_inner_prod16_aligned_64_c(inVec1, inVec2, len))
Chris@69 618 #endif
Chris@69 619
Chris@69 620 #include "Inlines.h"
Chris@69 621 #include "MacroCount.h"
Chris@69 622 #include "MacroDebug.h"
Chris@69 623
Chris@69 624 #ifdef OPUS_ARM_INLINE_ASM
Chris@69 625 #include "arm/SigProc_FIX_armv4.h"
Chris@69 626 #endif
Chris@69 627
Chris@69 628 #ifdef OPUS_ARM_INLINE_EDSP
Chris@69 629 #include "arm/SigProc_FIX_armv5e.h"
Chris@69 630 #endif
Chris@69 631
Chris@69 632 #if defined(MIPSr1_ASM)
Chris@69 633 #include "mips/sigproc_fix_mipsr1.h"
Chris@69 634 #endif
Chris@69 635
Chris@69 636
Chris@69 637 #ifdef __cplusplus
Chris@69 638 }
Chris@69 639 #endif
Chris@69 640
Chris@69 641 #endif /* SILK_SIGPROC_FIX_H */