cannam@154: /* Copyright (c) 2014, Cisco Systems, INC cannam@154: Written by XiangMingZhu WeiZhou MinPeng YanWang cannam@154: cannam@154: Redistribution and use in source and binary forms, with or without cannam@154: modification, are permitted provided that the following conditions cannam@154: are met: cannam@154: cannam@154: - Redistributions of source code must retain the above copyright cannam@154: notice, this list of conditions and the following disclaimer. cannam@154: cannam@154: - Redistributions in binary form must reproduce the above copyright cannam@154: notice, this list of conditions and the following disclaimer in the cannam@154: documentation and/or other materials provided with the distribution. cannam@154: cannam@154: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS cannam@154: ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT cannam@154: LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR cannam@154: A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER cannam@154: OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, cannam@154: EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, cannam@154: PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR cannam@154: PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF cannam@154: LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING cannam@154: NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS cannam@154: SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. cannam@154: */ cannam@154: cannam@154: #ifdef HAVE_CONFIG_H cannam@154: #include "config.h" cannam@154: #endif cannam@154: cannam@154: #include cannam@154: #include cannam@154: #include cannam@154: cannam@154: #include "SigProc_FIX.h" cannam@154: #include "define.h" cannam@154: #include "tuning_parameters.h" cannam@154: #include "pitch.h" cannam@154: #include "celt/x86/x86cpu.h" cannam@154: cannam@154: #define MAX_FRAME_SIZE 384 /* subfr_length * nb_subfr = ( 0.005 * 16000 + 16 ) * 4 = 384 */ cannam@154: cannam@154: #define QA 25 cannam@154: #define N_BITS_HEAD_ROOM 2 cannam@154: #define MIN_RSHIFTS -16 cannam@154: #define MAX_RSHIFTS (32 - QA) cannam@154: cannam@154: /* Compute reflection coefficients from input signal */ cannam@154: void silk_burg_modified_sse4_1( cannam@154: opus_int32 *res_nrg, /* O Residual energy */ cannam@154: opus_int *res_nrg_Q, /* O Residual energy Q value */ cannam@154: opus_int32 A_Q16[], /* O Prediction coefficients (length order) */ cannam@154: const opus_int16 x[], /* I Input signal, length: nb_subfr * ( D + subfr_length ) */ cannam@154: const opus_int32 minInvGain_Q30, /* I Inverse of max prediction gain */ cannam@154: const opus_int subfr_length, /* I Input signal subframe length (incl. D preceding samples) */ cannam@154: const opus_int nb_subfr, /* I Number of subframes stacked in x */ cannam@154: const opus_int D, /* I Order */ cannam@154: int arch /* I Run-time architecture */ cannam@154: ) cannam@154: { cannam@154: opus_int k, n, s, lz, rshifts, rshifts_extra, reached_max_gain; cannam@154: opus_int32 C0, num, nrg, rc_Q31, invGain_Q30, Atmp_QA, Atmp1, tmp1, tmp2, x1, x2; cannam@154: const opus_int16 *x_ptr; cannam@154: opus_int32 C_first_row[ SILK_MAX_ORDER_LPC ]; cannam@154: opus_int32 C_last_row[ SILK_MAX_ORDER_LPC ]; cannam@154: opus_int32 Af_QA[ SILK_MAX_ORDER_LPC ]; cannam@154: opus_int32 CAf[ SILK_MAX_ORDER_LPC + 1 ]; cannam@154: opus_int32 CAb[ SILK_MAX_ORDER_LPC + 1 ]; cannam@154: opus_int32 xcorr[ SILK_MAX_ORDER_LPC ]; cannam@154: cannam@154: __m128i FIRST_3210, LAST_3210, ATMP_3210, TMP1_3210, TMP2_3210, T1_3210, T2_3210, PTR_3210, SUBFR_3210, X1_3210, X2_3210; cannam@154: __m128i CONST1 = _mm_set1_epi32(1); cannam@154: cannam@154: celt_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE ); cannam@154: cannam@154: /* Compute autocorrelations, added over subframes */ cannam@154: silk_sum_sqr_shift( &C0, &rshifts, x, nb_subfr * subfr_length ); cannam@154: if( rshifts > MAX_RSHIFTS ) { cannam@154: C0 = silk_LSHIFT32( C0, rshifts - MAX_RSHIFTS ); cannam@154: silk_assert( C0 > 0 ); cannam@154: rshifts = MAX_RSHIFTS; cannam@154: } else { cannam@154: lz = silk_CLZ32( C0 ) - 1; cannam@154: rshifts_extra = N_BITS_HEAD_ROOM - lz; cannam@154: if( rshifts_extra > 0 ) { cannam@154: rshifts_extra = silk_min( rshifts_extra, MAX_RSHIFTS - rshifts ); cannam@154: C0 = silk_RSHIFT32( C0, rshifts_extra ); cannam@154: } else { cannam@154: rshifts_extra = silk_max( rshifts_extra, MIN_RSHIFTS - rshifts ); cannam@154: C0 = silk_LSHIFT32( C0, -rshifts_extra ); cannam@154: } cannam@154: rshifts += rshifts_extra; cannam@154: } cannam@154: CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1; /* Q(-rshifts) */ cannam@154: silk_memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) ); cannam@154: if( rshifts > 0 ) { cannam@154: for( s = 0; s < nb_subfr; s++ ) { cannam@154: x_ptr = x + s * subfr_length; cannam@154: for( n = 1; n < D + 1; n++ ) { cannam@154: C_first_row[ n - 1 ] += (opus_int32)silk_RSHIFT64( cannam@154: silk_inner_prod16_aligned_64( x_ptr, x_ptr + n, subfr_length - n, arch ), rshifts ); cannam@154: } cannam@154: } cannam@154: } else { cannam@154: for( s = 0; s < nb_subfr; s++ ) { cannam@154: int i; cannam@154: opus_int32 d; cannam@154: x_ptr = x + s * subfr_length; cannam@154: celt_pitch_xcorr(x_ptr, x_ptr + 1, xcorr, subfr_length - D, D, arch ); cannam@154: for( n = 1; n < D + 1; n++ ) { cannam@154: for ( i = n + subfr_length - D, d = 0; i < subfr_length; i++ ) cannam@154: d = MAC16_16( d, x_ptr[ i ], x_ptr[ i - n ] ); cannam@154: xcorr[ n - 1 ] += d; cannam@154: } cannam@154: for( n = 1; n < D + 1; n++ ) { cannam@154: C_first_row[ n - 1 ] += silk_LSHIFT32( xcorr[ n - 1 ], -rshifts ); cannam@154: } cannam@154: } cannam@154: } cannam@154: silk_memcpy( C_last_row, C_first_row, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) ); cannam@154: cannam@154: /* Initialize */ cannam@154: CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1; /* Q(-rshifts) */ cannam@154: cannam@154: invGain_Q30 = (opus_int32)1 << 30; cannam@154: reached_max_gain = 0; cannam@154: for( n = 0; n < D; n++ ) { cannam@154: /* Update first row of correlation matrix (without first element) */ cannam@154: /* Update last row of correlation matrix (without last element, stored in reversed order) */ cannam@154: /* Update C * Af */ cannam@154: /* Update C * flipud(Af) (stored in reversed order) */ cannam@154: if( rshifts > -2 ) { cannam@154: for( s = 0; s < nb_subfr; s++ ) { cannam@154: x_ptr = x + s * subfr_length; cannam@154: x1 = -silk_LSHIFT32( (opus_int32)x_ptr[ n ], 16 - rshifts ); /* Q(16-rshifts) */ cannam@154: x2 = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 16 - rshifts ); /* Q(16-rshifts) */ cannam@154: tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ], QA - 16 ); /* Q(QA-16) */ cannam@154: tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], QA - 16 ); /* Q(QA-16) */ cannam@154: for( k = 0; k < n; k++ ) { cannam@154: C_first_row[ k ] = silk_SMLAWB( C_first_row[ k ], x1, x_ptr[ n - k - 1 ] ); /* Q( -rshifts ) */ cannam@154: C_last_row[ k ] = silk_SMLAWB( C_last_row[ k ], x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */ cannam@154: Atmp_QA = Af_QA[ k ]; cannam@154: tmp1 = silk_SMLAWB( tmp1, Atmp_QA, x_ptr[ n - k - 1 ] ); /* Q(QA-16) */ cannam@154: tmp2 = silk_SMLAWB( tmp2, Atmp_QA, x_ptr[ subfr_length - n + k ] ); /* Q(QA-16) */ cannam@154: } cannam@154: tmp1 = silk_LSHIFT32( -tmp1, 32 - QA - rshifts ); /* Q(16-rshifts) */ cannam@154: tmp2 = silk_LSHIFT32( -tmp2, 32 - QA - rshifts ); /* Q(16-rshifts) */ cannam@154: for( k = 0; k <= n; k++ ) { cannam@154: CAf[ k ] = silk_SMLAWB( CAf[ k ], tmp1, x_ptr[ n - k ] ); /* Q( -rshift ) */ cannam@154: CAb[ k ] = silk_SMLAWB( CAb[ k ], tmp2, x_ptr[ subfr_length - n + k - 1 ] ); /* Q( -rshift ) */ cannam@154: } cannam@154: } cannam@154: } else { cannam@154: for( s = 0; s < nb_subfr; s++ ) { cannam@154: x_ptr = x + s * subfr_length; cannam@154: x1 = -silk_LSHIFT32( (opus_int32)x_ptr[ n ], -rshifts ); /* Q( -rshifts ) */ cannam@154: x2 = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], -rshifts ); /* Q( -rshifts ) */ cannam@154: tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ], 17 ); /* Q17 */ cannam@154: tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 17 ); /* Q17 */ cannam@154: cannam@154: X1_3210 = _mm_set1_epi32( x1 ); cannam@154: X2_3210 = _mm_set1_epi32( x2 ); cannam@154: TMP1_3210 = _mm_setzero_si128(); cannam@154: TMP2_3210 = _mm_setzero_si128(); cannam@154: for( k = 0; k < n - 3; k += 4 ) { cannam@154: PTR_3210 = OP_CVTEPI16_EPI32_M64( &x_ptr[ n - k - 1 - 3 ] ); cannam@154: SUBFR_3210 = OP_CVTEPI16_EPI32_M64( &x_ptr[ subfr_length - n + k ] ); cannam@154: FIRST_3210 = _mm_loadu_si128( (__m128i *)&C_first_row[ k ] ); cannam@154: PTR_3210 = _mm_shuffle_epi32( PTR_3210, _MM_SHUFFLE( 0, 1, 2, 3 ) ); cannam@154: LAST_3210 = _mm_loadu_si128( (__m128i *)&C_last_row[ k ] ); cannam@154: ATMP_3210 = _mm_loadu_si128( (__m128i *)&Af_QA[ k ] ); cannam@154: cannam@154: T1_3210 = _mm_mullo_epi32( PTR_3210, X1_3210 ); cannam@154: T2_3210 = _mm_mullo_epi32( SUBFR_3210, X2_3210 ); cannam@154: cannam@154: ATMP_3210 = _mm_srai_epi32( ATMP_3210, 7 ); cannam@154: ATMP_3210 = _mm_add_epi32( ATMP_3210, CONST1 ); cannam@154: ATMP_3210 = _mm_srai_epi32( ATMP_3210, 1 ); cannam@154: cannam@154: FIRST_3210 = _mm_add_epi32( FIRST_3210, T1_3210 ); cannam@154: LAST_3210 = _mm_add_epi32( LAST_3210, T2_3210 ); cannam@154: cannam@154: PTR_3210 = _mm_mullo_epi32( ATMP_3210, PTR_3210 ); cannam@154: SUBFR_3210 = _mm_mullo_epi32( ATMP_3210, SUBFR_3210 ); cannam@154: cannam@154: _mm_storeu_si128( (__m128i *)&C_first_row[ k ], FIRST_3210 ); cannam@154: _mm_storeu_si128( (__m128i *)&C_last_row[ k ], LAST_3210 ); cannam@154: cannam@154: TMP1_3210 = _mm_add_epi32( TMP1_3210, PTR_3210 ); cannam@154: TMP2_3210 = _mm_add_epi32( TMP2_3210, SUBFR_3210 ); cannam@154: } cannam@154: cannam@154: TMP1_3210 = _mm_add_epi32( TMP1_3210, _mm_unpackhi_epi64(TMP1_3210, TMP1_3210 ) ); cannam@154: TMP2_3210 = _mm_add_epi32( TMP2_3210, _mm_unpackhi_epi64(TMP2_3210, TMP2_3210 ) ); cannam@154: TMP1_3210 = _mm_add_epi32( TMP1_3210, _mm_shufflelo_epi16(TMP1_3210, 0x0E ) ); cannam@154: TMP2_3210 = _mm_add_epi32( TMP2_3210, _mm_shufflelo_epi16(TMP2_3210, 0x0E ) ); cannam@154: cannam@154: tmp1 += _mm_cvtsi128_si32( TMP1_3210 ); cannam@154: tmp2 += _mm_cvtsi128_si32( TMP2_3210 ); cannam@154: cannam@154: for( ; k < n; k++ ) { cannam@154: C_first_row[ k ] = silk_MLA( C_first_row[ k ], x1, x_ptr[ n - k - 1 ] ); /* Q( -rshifts ) */ cannam@154: C_last_row[ k ] = silk_MLA( C_last_row[ k ], x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */ cannam@154: Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 17 ); /* Q17 */ cannam@154: tmp1 = silk_MLA( tmp1, x_ptr[ n - k - 1 ], Atmp1 ); /* Q17 */ cannam@154: tmp2 = silk_MLA( tmp2, x_ptr[ subfr_length - n + k ], Atmp1 ); /* Q17 */ cannam@154: } cannam@154: cannam@154: tmp1 = -tmp1; /* Q17 */ cannam@154: tmp2 = -tmp2; /* Q17 */ cannam@154: cannam@154: { cannam@154: __m128i xmm_tmp1, xmm_tmp2; cannam@154: __m128i xmm_x_ptr_n_k_x2x0, xmm_x_ptr_n_k_x3x1; cannam@154: __m128i xmm_x_ptr_sub_x2x0, xmm_x_ptr_sub_x3x1; cannam@154: cannam@154: xmm_tmp1 = _mm_set1_epi32( tmp1 ); cannam@154: xmm_tmp2 = _mm_set1_epi32( tmp2 ); cannam@154: cannam@154: for( k = 0; k <= n - 3; k += 4 ) { cannam@154: xmm_x_ptr_n_k_x2x0 = OP_CVTEPI16_EPI32_M64( &x_ptr[ n - k - 3 ] ); cannam@154: xmm_x_ptr_sub_x2x0 = OP_CVTEPI16_EPI32_M64( &x_ptr[ subfr_length - n + k - 1 ] ); cannam@154: cannam@154: xmm_x_ptr_n_k_x2x0 = _mm_shuffle_epi32( xmm_x_ptr_n_k_x2x0, _MM_SHUFFLE( 0, 1, 2, 3 ) ); cannam@154: cannam@154: xmm_x_ptr_n_k_x2x0 = _mm_slli_epi32( xmm_x_ptr_n_k_x2x0, -rshifts - 1 ); cannam@154: xmm_x_ptr_sub_x2x0 = _mm_slli_epi32( xmm_x_ptr_sub_x2x0, -rshifts - 1 ); cannam@154: cannam@154: /* equal shift right 4 bytes, xmm_x_ptr_n_k_x3x1 = _mm_srli_si128(xmm_x_ptr_n_k_x2x0, 4)*/ cannam@154: xmm_x_ptr_n_k_x3x1 = _mm_shuffle_epi32( xmm_x_ptr_n_k_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) ); cannam@154: xmm_x_ptr_sub_x3x1 = _mm_shuffle_epi32( xmm_x_ptr_sub_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) ); cannam@154: cannam@154: xmm_x_ptr_n_k_x2x0 = _mm_mul_epi32( xmm_x_ptr_n_k_x2x0, xmm_tmp1 ); cannam@154: xmm_x_ptr_n_k_x3x1 = _mm_mul_epi32( xmm_x_ptr_n_k_x3x1, xmm_tmp1 ); cannam@154: xmm_x_ptr_sub_x2x0 = _mm_mul_epi32( xmm_x_ptr_sub_x2x0, xmm_tmp2 ); cannam@154: xmm_x_ptr_sub_x3x1 = _mm_mul_epi32( xmm_x_ptr_sub_x3x1, xmm_tmp2 ); cannam@154: cannam@154: xmm_x_ptr_n_k_x2x0 = _mm_srli_epi64( xmm_x_ptr_n_k_x2x0, 16 ); cannam@154: xmm_x_ptr_n_k_x3x1 = _mm_slli_epi64( xmm_x_ptr_n_k_x3x1, 16 ); cannam@154: xmm_x_ptr_sub_x2x0 = _mm_srli_epi64( xmm_x_ptr_sub_x2x0, 16 ); cannam@154: xmm_x_ptr_sub_x3x1 = _mm_slli_epi64( xmm_x_ptr_sub_x3x1, 16 ); cannam@154: cannam@154: xmm_x_ptr_n_k_x2x0 = _mm_blend_epi16( xmm_x_ptr_n_k_x2x0, xmm_x_ptr_n_k_x3x1, 0xCC ); cannam@154: xmm_x_ptr_sub_x2x0 = _mm_blend_epi16( xmm_x_ptr_sub_x2x0, xmm_x_ptr_sub_x3x1, 0xCC ); cannam@154: cannam@154: X1_3210 = _mm_loadu_si128( (__m128i *)&CAf[ k ] ); cannam@154: PTR_3210 = _mm_loadu_si128( (__m128i *)&CAb[ k ] ); cannam@154: cannam@154: X1_3210 = _mm_add_epi32( X1_3210, xmm_x_ptr_n_k_x2x0 ); cannam@154: PTR_3210 = _mm_add_epi32( PTR_3210, xmm_x_ptr_sub_x2x0 ); cannam@154: cannam@154: _mm_storeu_si128( (__m128i *)&CAf[ k ], X1_3210 ); cannam@154: _mm_storeu_si128( (__m128i *)&CAb[ k ], PTR_3210 ); cannam@154: } cannam@154: cannam@154: for( ; k <= n; k++ ) { cannam@154: CAf[ k ] = silk_SMLAWW( CAf[ k ], tmp1, cannam@154: silk_LSHIFT32( (opus_int32)x_ptr[ n - k ], -rshifts - 1 ) ); /* Q( -rshift ) */ cannam@154: CAb[ k ] = silk_SMLAWW( CAb[ k ], tmp2, cannam@154: silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n + k - 1 ], -rshifts - 1 ) ); /* Q( -rshift ) */ cannam@154: } cannam@154: } cannam@154: } cannam@154: } cannam@154: cannam@154: /* Calculate nominator and denominator for the next order reflection (parcor) coefficient */ cannam@154: tmp1 = C_first_row[ n ]; /* Q( -rshifts ) */ cannam@154: tmp2 = C_last_row[ n ]; /* Q( -rshifts ) */ cannam@154: num = 0; /* Q( -rshifts ) */ cannam@154: nrg = silk_ADD32( CAb[ 0 ], CAf[ 0 ] ); /* Q( 1-rshifts ) */ cannam@154: for( k = 0; k < n; k++ ) { cannam@154: Atmp_QA = Af_QA[ k ]; cannam@154: lz = silk_CLZ32( silk_abs( Atmp_QA ) ) - 1; cannam@154: lz = silk_min( 32 - QA, lz ); cannam@154: Atmp1 = silk_LSHIFT32( Atmp_QA, lz ); /* Q( QA + lz ) */ cannam@154: cannam@154: tmp1 = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( C_last_row[ n - k - 1 ], Atmp1 ), 32 - QA - lz ); /* Q( -rshifts ) */ cannam@154: tmp2 = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( C_first_row[ n - k - 1 ], Atmp1 ), 32 - QA - lz ); /* Q( -rshifts ) */ cannam@154: num = silk_ADD_LSHIFT32( num, silk_SMMUL( CAb[ n - k ], Atmp1 ), 32 - QA - lz ); /* Q( -rshifts ) */ cannam@154: nrg = silk_ADD_LSHIFT32( nrg, silk_SMMUL( silk_ADD32( CAb[ k + 1 ], CAf[ k + 1 ] ), cannam@154: Atmp1 ), 32 - QA - lz ); /* Q( 1-rshifts ) */ cannam@154: } cannam@154: CAf[ n + 1 ] = tmp1; /* Q( -rshifts ) */ cannam@154: CAb[ n + 1 ] = tmp2; /* Q( -rshifts ) */ cannam@154: num = silk_ADD32( num, tmp2 ); /* Q( -rshifts ) */ cannam@154: num = silk_LSHIFT32( -num, 1 ); /* Q( 1-rshifts ) */ cannam@154: cannam@154: /* Calculate the next order reflection (parcor) coefficient */ cannam@154: if( silk_abs( num ) < nrg ) { cannam@154: rc_Q31 = silk_DIV32_varQ( num, nrg, 31 ); cannam@154: } else { cannam@154: rc_Q31 = ( num > 0 ) ? silk_int32_MAX : silk_int32_MIN; cannam@154: } cannam@154: cannam@154: /* Update inverse prediction gain */ cannam@154: tmp1 = ( (opus_int32)1 << 30 ) - silk_SMMUL( rc_Q31, rc_Q31 ); cannam@154: tmp1 = silk_LSHIFT( silk_SMMUL( invGain_Q30, tmp1 ), 2 ); cannam@154: if( tmp1 <= minInvGain_Q30 ) { cannam@154: /* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */ cannam@154: tmp2 = ( (opus_int32)1 << 30 ) - silk_DIV32_varQ( minInvGain_Q30, invGain_Q30, 30 ); /* Q30 */ cannam@154: rc_Q31 = silk_SQRT_APPROX( tmp2 ); /* Q15 */ cannam@154: if( rc_Q31 > 0 ) { cannam@154: /* Newton-Raphson iteration */ cannam@154: rc_Q31 = silk_RSHIFT32( rc_Q31 + silk_DIV32( tmp2, rc_Q31 ), 1 ); /* Q15 */ cannam@154: rc_Q31 = silk_LSHIFT32( rc_Q31, 16 ); /* Q31 */ cannam@154: if( num < 0 ) { cannam@154: /* Ensure adjusted reflection coefficients has the original sign */ cannam@154: rc_Q31 = -rc_Q31; cannam@154: } cannam@154: } cannam@154: invGain_Q30 = minInvGain_Q30; cannam@154: reached_max_gain = 1; cannam@154: } else { cannam@154: invGain_Q30 = tmp1; cannam@154: } cannam@154: cannam@154: /* Update the AR coefficients */ cannam@154: for( k = 0; k < (n + 1) >> 1; k++ ) { cannam@154: tmp1 = Af_QA[ k ]; /* QA */ cannam@154: tmp2 = Af_QA[ n - k - 1 ]; /* QA */ cannam@154: Af_QA[ k ] = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 ); /* QA */ cannam@154: Af_QA[ n - k - 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 ); /* QA */ cannam@154: } cannam@154: Af_QA[ n ] = silk_RSHIFT32( rc_Q31, 31 - QA ); /* QA */ cannam@154: cannam@154: if( reached_max_gain ) { cannam@154: /* Reached max prediction gain; set remaining coefficients to zero and exit loop */ cannam@154: for( k = n + 1; k < D; k++ ) { cannam@154: Af_QA[ k ] = 0; cannam@154: } cannam@154: break; cannam@154: } cannam@154: cannam@154: /* Update C * Af and C * Ab */ cannam@154: for( k = 0; k <= n + 1; k++ ) { cannam@154: tmp1 = CAf[ k ]; /* Q( -rshifts ) */ cannam@154: tmp2 = CAb[ n - k + 1 ]; /* Q( -rshifts ) */ cannam@154: CAf[ k ] = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 ); /* Q( -rshifts ) */ cannam@154: CAb[ n - k + 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 ); /* Q( -rshifts ) */ cannam@154: } cannam@154: } cannam@154: cannam@154: if( reached_max_gain ) { cannam@154: for( k = 0; k < D; k++ ) { cannam@154: /* Scale coefficients */ cannam@154: A_Q16[ k ] = -silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 ); cannam@154: } cannam@154: /* Subtract energy of preceding samples from C0 */ cannam@154: if( rshifts > 0 ) { cannam@154: for( s = 0; s < nb_subfr; s++ ) { cannam@154: x_ptr = x + s * subfr_length; cannam@154: C0 -= (opus_int32)silk_RSHIFT64( silk_inner_prod16_aligned_64( x_ptr, x_ptr, D, arch ), rshifts ); cannam@154: } cannam@154: } else { cannam@154: for( s = 0; s < nb_subfr; s++ ) { cannam@154: x_ptr = x + s * subfr_length; cannam@154: C0 -= silk_LSHIFT32( silk_inner_prod_aligned( x_ptr, x_ptr, D, arch ), -rshifts ); cannam@154: } cannam@154: } cannam@154: /* Approximate residual energy */ cannam@154: *res_nrg = silk_LSHIFT( silk_SMMUL( invGain_Q30, C0 ), 2 ); cannam@154: *res_nrg_Q = -rshifts; cannam@154: } else { cannam@154: /* Return residual energy */ cannam@154: nrg = CAf[ 0 ]; /* Q( -rshifts ) */ cannam@154: tmp1 = (opus_int32)1 << 16; /* Q16 */ cannam@154: for( k = 0; k < D; k++ ) { cannam@154: Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 ); /* Q16 */ cannam@154: nrg = silk_SMLAWW( nrg, CAf[ k + 1 ], Atmp1 ); /* Q( -rshifts ) */ cannam@154: tmp1 = silk_SMLAWW( tmp1, Atmp1, Atmp1 ); /* Q16 */ cannam@154: A_Q16[ k ] = -Atmp1; cannam@154: } cannam@154: *res_nrg = silk_SMLAWW( nrg, silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ), -tmp1 );/* Q( -rshifts ) */ cannam@154: *res_nrg_Q = -rshifts; cannam@154: } cannam@154: }