| Chris@69 | 1 /*********************************************************************** | 
| Chris@69 | 2 Copyright (c) 2017 Google Inc. | 
| Chris@69 | 3 Redistribution and use in source and binary forms, with or without | 
| Chris@69 | 4 modification, are permitted provided that the following conditions | 
| Chris@69 | 5 are met: | 
| Chris@69 | 6 - Redistributions of source code must retain the above copyright notice, | 
| Chris@69 | 7 this list of conditions and the following disclaimer. | 
| Chris@69 | 8 - Redistributions in binary form must reproduce the above copyright | 
| Chris@69 | 9 notice, this list of conditions and the following disclaimer in the | 
| Chris@69 | 10 documentation and/or other materials provided with the distribution. | 
| Chris@69 | 11 - Neither the name of Internet Society, IETF or IETF Trust, nor the | 
| Chris@69 | 12 names of specific contributors, may be used to endorse or promote | 
| Chris@69 | 13 products derived from this software without specific prior written | 
| Chris@69 | 14 permission. | 
| Chris@69 | 15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | 
| Chris@69 | 16 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 
| Chris@69 | 17 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | 
| Chris@69 | 18 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | 
| Chris@69 | 19 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | 
| Chris@69 | 20 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | 
| Chris@69 | 21 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | 
| Chris@69 | 22 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | 
| Chris@69 | 23 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | 
| Chris@69 | 24 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | 
| Chris@69 | 25 POSSIBILITY OF SUCH DAMAGE. | 
| Chris@69 | 26 ***********************************************************************/ | 
| Chris@69 | 27 | 
| Chris@69 | 28 #ifdef HAVE_CONFIG_H | 
| Chris@69 | 29 #include "config.h" | 
| Chris@69 | 30 #endif | 
| Chris@69 | 31 | 
| Chris@69 | 32 #include <arm_neon.h> | 
| Chris@69 | 33 #include "SigProc_FIX.h" | 
| Chris@69 | 34 #include "define.h" | 
| Chris@69 | 35 | 
| Chris@69 | 36 #define QA                          24 | 
| Chris@69 | 37 #define A_LIMIT                     SILK_FIX_CONST( 0.99975, QA ) | 
| Chris@69 | 38 | 
| Chris@69 | 39 #define MUL32_FRAC_Q(a32, b32, Q)   ((opus_int32)(silk_RSHIFT_ROUND64(silk_SMULL(a32, b32), Q))) | 
| Chris@69 | 40 | 
| Chris@69 | 41 /* The difficulty is how to judge a 64-bit signed integer tmp64 is 32-bit overflowed, | 
| Chris@69 | 42  * since NEON has no 64-bit min, max or comparison instructions. | 
| Chris@69 | 43  * A failed idea is to compare the results of vmovn(tmp64) and vqmovn(tmp64) whether they are equal or not. | 
| Chris@69 | 44  * However, this idea fails when the tmp64 is something like 0xFFFFFFF980000000. | 
| Chris@69 | 45  * Here we know that mult2Q >= 1, so the highest bit (bit 63, sign bit) of tmp64 must equal to bit 62. | 
| Chris@69 | 46  * tmp64 was shifted left by 1 and we got tmp64'. If high_half(tmp64') != 0 and high_half(tmp64') != -1, | 
| Chris@69 | 47  * then we know that bit 31 to bit 63 of tmp64 can not all be the sign bit, and therefore tmp64 is 32-bit overflowed. | 
| Chris@69 | 48  * That is, we judge if tmp64' > 0x00000000FFFFFFFF, or tmp64' <= 0xFFFFFFFF00000000. | 
| Chris@69 | 49  * We use narrowing shift right 31 bits to tmp32' to save data bandwidth and instructions. | 
| Chris@69 | 50  * That is, we judge if tmp32' > 0x00000000, or tmp32' <= 0xFFFFFFFF. | 
| Chris@69 | 51  */ | 
| Chris@69 | 52 | 
| Chris@69 | 53 /* Compute inverse of LPC prediction gain, and                          */ | 
| Chris@69 | 54 /* test if LPC coefficients are stable (all poles within unit circle)   */ | 
| Chris@69 | 55 static OPUS_INLINE opus_int32 LPC_inverse_pred_gain_QA_neon( /* O   Returns inverse prediction gain in energy domain, Q30    */ | 
| Chris@69 | 56     opus_int32           A_QA[ SILK_MAX_ORDER_LPC ],         /* I   Prediction coefficients                                  */ | 
| Chris@69 | 57     const opus_int       order                               /* I   Prediction order                                         */ | 
| Chris@69 | 58 ) | 
| Chris@69 | 59 { | 
| Chris@69 | 60     opus_int   k, n, mult2Q; | 
| Chris@69 | 61     opus_int32 invGain_Q30, rc_Q31, rc_mult1_Q30, rc_mult2, tmp1, tmp2; | 
| Chris@69 | 62     opus_int32 max, min; | 
| Chris@69 | 63     int32x4_t  max_s32x4, min_s32x4; | 
| Chris@69 | 64     int32x2_t  max_s32x2, min_s32x2; | 
| Chris@69 | 65 | 
| Chris@69 | 66     max_s32x4 = vdupq_n_s32( silk_int32_MIN ); | 
| Chris@69 | 67     min_s32x4 = vdupq_n_s32( silk_int32_MAX ); | 
| Chris@69 | 68     invGain_Q30 = SILK_FIX_CONST( 1, 30 ); | 
| Chris@69 | 69     for( k = order - 1; k > 0; k-- ) { | 
| Chris@69 | 70         int32x2_t rc_Q31_s32x2, rc_mult2_s32x2; | 
| Chris@69 | 71         int64x2_t mult2Q_s64x2; | 
| Chris@69 | 72 | 
| Chris@69 | 73         /* Check for stability */ | 
| Chris@69 | 74         if( ( A_QA[ k ] > A_LIMIT ) || ( A_QA[ k ] < -A_LIMIT ) ) { | 
| Chris@69 | 75             return 0; | 
| Chris@69 | 76         } | 
| Chris@69 | 77 | 
| Chris@69 | 78         /* Set RC equal to negated AR coef */ | 
| Chris@69 | 79         rc_Q31 = -silk_LSHIFT( A_QA[ k ], 31 - QA ); | 
| Chris@69 | 80 | 
| Chris@69 | 81         /* rc_mult1_Q30 range: [ 1 : 2^30 ] */ | 
| Chris@69 | 82         rc_mult1_Q30 = silk_SUB32( SILK_FIX_CONST( 1, 30 ), silk_SMMUL( rc_Q31, rc_Q31 ) ); | 
| Chris@69 | 83         silk_assert( rc_mult1_Q30 > ( 1 << 15 ) );                   /* reduce A_LIMIT if fails */ | 
| Chris@69 | 84         silk_assert( rc_mult1_Q30 <= ( 1 << 30 ) ); | 
| Chris@69 | 85 | 
| Chris@69 | 86         /* Update inverse gain */ | 
| Chris@69 | 87         /* invGain_Q30 range: [ 0 : 2^30 ] */ | 
| Chris@69 | 88         invGain_Q30 = silk_LSHIFT( silk_SMMUL( invGain_Q30, rc_mult1_Q30 ), 2 ); | 
| Chris@69 | 89         silk_assert( invGain_Q30 >= 0           ); | 
| Chris@69 | 90         silk_assert( invGain_Q30 <= ( 1 << 30 ) ); | 
| Chris@69 | 91         if( invGain_Q30 < SILK_FIX_CONST( 1.0f / MAX_PREDICTION_POWER_GAIN, 30 ) ) { | 
| Chris@69 | 92             return 0; | 
| Chris@69 | 93         } | 
| Chris@69 | 94 | 
| Chris@69 | 95         /* rc_mult2 range: [ 2^30 : silk_int32_MAX ] */ | 
| Chris@69 | 96         mult2Q = 32 - silk_CLZ32( silk_abs( rc_mult1_Q30 ) ); | 
| Chris@69 | 97         rc_mult2 = silk_INVERSE32_varQ( rc_mult1_Q30, mult2Q + 30 ); | 
| Chris@69 | 98 | 
| Chris@69 | 99         /* Update AR coefficient */ | 
| Chris@69 | 100         rc_Q31_s32x2   = vdup_n_s32( rc_Q31 ); | 
| Chris@69 | 101         mult2Q_s64x2   = vdupq_n_s64( -mult2Q ); | 
| Chris@69 | 102         rc_mult2_s32x2 = vdup_n_s32( rc_mult2 ); | 
| Chris@69 | 103 | 
| Chris@69 | 104         for( n = 0; n < ( ( k + 1 ) >> 1 ) - 3; n += 4 ) { | 
| Chris@69 | 105             /* We always calculate extra elements of A_QA buffer when ( k % 4 ) != 0, to take the advantage of SIMD parallelization. */ | 
| Chris@69 | 106             int32x4_t tmp1_s32x4, tmp2_s32x4, t0_s32x4, t1_s32x4, s0_s32x4, s1_s32x4, t_QA0_s32x4, t_QA1_s32x4; | 
| Chris@69 | 107             int64x2_t t0_s64x2, t1_s64x2, t2_s64x2, t3_s64x2; | 
| Chris@69 | 108             tmp1_s32x4  = vld1q_s32( A_QA + n ); | 
| Chris@69 | 109             tmp2_s32x4  = vld1q_s32( A_QA + k - n - 4 ); | 
| Chris@69 | 110             tmp2_s32x4  = vrev64q_s32( tmp2_s32x4 ); | 
| Chris@69 | 111             tmp2_s32x4  = vcombine_s32( vget_high_s32( tmp2_s32x4 ), vget_low_s32( tmp2_s32x4 ) ); | 
| Chris@69 | 112             t0_s32x4    = vqrdmulhq_lane_s32( tmp2_s32x4, rc_Q31_s32x2, 0 ); | 
| Chris@69 | 113             t1_s32x4    = vqrdmulhq_lane_s32( tmp1_s32x4, rc_Q31_s32x2, 0 ); | 
| Chris@69 | 114             t_QA0_s32x4 = vqsubq_s32( tmp1_s32x4, t0_s32x4 ); | 
| Chris@69 | 115             t_QA1_s32x4 = vqsubq_s32( tmp2_s32x4, t1_s32x4 ); | 
| Chris@69 | 116             t0_s64x2    = vmull_s32( vget_low_s32 ( t_QA0_s32x4 ), rc_mult2_s32x2 ); | 
| Chris@69 | 117             t1_s64x2    = vmull_s32( vget_high_s32( t_QA0_s32x4 ), rc_mult2_s32x2 ); | 
| Chris@69 | 118             t2_s64x2    = vmull_s32( vget_low_s32 ( t_QA1_s32x4 ), rc_mult2_s32x2 ); | 
| Chris@69 | 119             t3_s64x2    = vmull_s32( vget_high_s32( t_QA1_s32x4 ), rc_mult2_s32x2 ); | 
| Chris@69 | 120             t0_s64x2    = vrshlq_s64( t0_s64x2, mult2Q_s64x2 ); | 
| Chris@69 | 121             t1_s64x2    = vrshlq_s64( t1_s64x2, mult2Q_s64x2 ); | 
| Chris@69 | 122             t2_s64x2    = vrshlq_s64( t2_s64x2, mult2Q_s64x2 ); | 
| Chris@69 | 123             t3_s64x2    = vrshlq_s64( t3_s64x2, mult2Q_s64x2 ); | 
| Chris@69 | 124             t0_s32x4    = vcombine_s32( vmovn_s64( t0_s64x2 ), vmovn_s64( t1_s64x2 ) ); | 
| Chris@69 | 125             t1_s32x4    = vcombine_s32( vmovn_s64( t2_s64x2 ), vmovn_s64( t3_s64x2 ) ); | 
| Chris@69 | 126             s0_s32x4    = vcombine_s32( vshrn_n_s64( t0_s64x2, 31 ), vshrn_n_s64( t1_s64x2, 31 ) ); | 
| Chris@69 | 127             s1_s32x4    = vcombine_s32( vshrn_n_s64( t2_s64x2, 31 ), vshrn_n_s64( t3_s64x2, 31 ) ); | 
| Chris@69 | 128             max_s32x4   = vmaxq_s32( max_s32x4, s0_s32x4 ); | 
| Chris@69 | 129             min_s32x4   = vminq_s32( min_s32x4, s0_s32x4 ); | 
| Chris@69 | 130             max_s32x4   = vmaxq_s32( max_s32x4, s1_s32x4 ); | 
| Chris@69 | 131             min_s32x4   = vminq_s32( min_s32x4, s1_s32x4 ); | 
| Chris@69 | 132             t1_s32x4    = vrev64q_s32( t1_s32x4 ); | 
| Chris@69 | 133             t1_s32x4    = vcombine_s32( vget_high_s32( t1_s32x4 ), vget_low_s32( t1_s32x4 ) ); | 
| Chris@69 | 134             vst1q_s32( A_QA + n,         t0_s32x4 ); | 
| Chris@69 | 135             vst1q_s32( A_QA + k - n - 4, t1_s32x4 ); | 
| Chris@69 | 136         } | 
| Chris@69 | 137         for( ; n < (k + 1) >> 1; n++ ) { | 
| Chris@69 | 138             opus_int64 tmp64; | 
| Chris@69 | 139             tmp1 = A_QA[ n ]; | 
| Chris@69 | 140             tmp2 = A_QA[ k - n - 1 ]; | 
| Chris@69 | 141             tmp64 = silk_RSHIFT_ROUND64( silk_SMULL( silk_SUB_SAT32(tmp1, | 
| Chris@69 | 142                   MUL32_FRAC_Q( tmp2, rc_Q31, 31 ) ), rc_mult2 ), mult2Q); | 
| Chris@69 | 143             if( tmp64 > silk_int32_MAX || tmp64 < silk_int32_MIN ) { | 
| Chris@69 | 144                return 0; | 
| Chris@69 | 145             } | 
| Chris@69 | 146             A_QA[ n ] = ( opus_int32 )tmp64; | 
| Chris@69 | 147             tmp64 = silk_RSHIFT_ROUND64( silk_SMULL( silk_SUB_SAT32(tmp2, | 
| Chris@69 | 148                   MUL32_FRAC_Q( tmp1, rc_Q31, 31 ) ), rc_mult2), mult2Q); | 
| Chris@69 | 149             if( tmp64 > silk_int32_MAX || tmp64 < silk_int32_MIN ) { | 
| Chris@69 | 150                return 0; | 
| Chris@69 | 151             } | 
| Chris@69 | 152             A_QA[ k - n - 1 ] = ( opus_int32 )tmp64; | 
| Chris@69 | 153         } | 
| Chris@69 | 154     } | 
| Chris@69 | 155 | 
| Chris@69 | 156     /* Check for stability */ | 
| Chris@69 | 157     if( ( A_QA[ k ] > A_LIMIT ) || ( A_QA[ k ] < -A_LIMIT ) ) { | 
| Chris@69 | 158         return 0; | 
| Chris@69 | 159     } | 
| Chris@69 | 160 | 
| Chris@69 | 161     max_s32x2 = vmax_s32( vget_low_s32( max_s32x4 ), vget_high_s32( max_s32x4 ) ); | 
| Chris@69 | 162     min_s32x2 = vmin_s32( vget_low_s32( min_s32x4 ), vget_high_s32( min_s32x4 ) ); | 
| Chris@69 | 163     max_s32x2 = vmax_s32( max_s32x2, vreinterpret_s32_s64( vshr_n_s64( vreinterpret_s64_s32( max_s32x2 ), 32 ) ) ); | 
| Chris@69 | 164     min_s32x2 = vmin_s32( min_s32x2, vreinterpret_s32_s64( vshr_n_s64( vreinterpret_s64_s32( min_s32x2 ), 32 ) ) ); | 
| Chris@69 | 165     max = vget_lane_s32( max_s32x2, 0 ); | 
| Chris@69 | 166     min = vget_lane_s32( min_s32x2, 0 ); | 
| Chris@69 | 167     if( ( max > 0 ) || ( min < -1 ) ) { | 
| Chris@69 | 168         return 0; | 
| Chris@69 | 169     } | 
| Chris@69 | 170 | 
| Chris@69 | 171     /* Set RC equal to negated AR coef */ | 
| Chris@69 | 172     rc_Q31 = -silk_LSHIFT( A_QA[ 0 ], 31 - QA ); | 
| Chris@69 | 173 | 
| Chris@69 | 174     /* Range: [ 1 : 2^30 ] */ | 
| Chris@69 | 175     rc_mult1_Q30 = silk_SUB32( SILK_FIX_CONST( 1, 30 ), silk_SMMUL( rc_Q31, rc_Q31 ) ); | 
| Chris@69 | 176 | 
| Chris@69 | 177     /* Update inverse gain */ | 
| Chris@69 | 178     /* Range: [ 0 : 2^30 ] */ | 
| Chris@69 | 179     invGain_Q30 = silk_LSHIFT( silk_SMMUL( invGain_Q30, rc_mult1_Q30 ), 2 ); | 
| Chris@69 | 180     silk_assert( invGain_Q30 >= 0           ); | 
| Chris@69 | 181     silk_assert( invGain_Q30 <= ( 1 << 30 ) ); | 
| Chris@69 | 182     if( invGain_Q30 < SILK_FIX_CONST( 1.0f / MAX_PREDICTION_POWER_GAIN, 30 ) ) { | 
| Chris@69 | 183         return 0; | 
| Chris@69 | 184     } | 
| Chris@69 | 185 | 
| Chris@69 | 186     return invGain_Q30; | 
| Chris@69 | 187 } | 
| Chris@69 | 188 | 
| Chris@69 | 189 /* For input in Q12 domain */ | 
| Chris@69 | 190 opus_int32 silk_LPC_inverse_pred_gain_neon(         /* O   Returns inverse prediction gain in energy domain, Q30        */ | 
| Chris@69 | 191     const opus_int16            *A_Q12,             /* I   Prediction coefficients, Q12 [order]                         */ | 
| Chris@69 | 192     const opus_int              order               /* I   Prediction order                                             */ | 
| Chris@69 | 193 ) | 
| Chris@69 | 194 { | 
| Chris@69 | 195 #ifdef OPUS_CHECK_ASM | 
| Chris@69 | 196     const opus_int32 invGain_Q30_c = silk_LPC_inverse_pred_gain_c( A_Q12, order ); | 
| Chris@69 | 197 #endif | 
| Chris@69 | 198 | 
| Chris@69 | 199     opus_int32 invGain_Q30; | 
| Chris@69 | 200     if( ( SILK_MAX_ORDER_LPC != 24 ) || ( order & 1 )) { | 
| Chris@69 | 201         invGain_Q30 = silk_LPC_inverse_pred_gain_c( A_Q12, order ); | 
| Chris@69 | 202     } | 
| Chris@69 | 203     else { | 
| Chris@69 | 204         opus_int32 Atmp_QA[ SILK_MAX_ORDER_LPC ]; | 
| Chris@69 | 205         opus_int32 DC_resp; | 
| Chris@69 | 206         int16x8_t  t0_s16x8, t1_s16x8, t2_s16x8; | 
| Chris@69 | 207         int32x4_t  t0_s32x4; | 
| Chris@69 | 208         const opus_int leftover = order & 7; | 
| Chris@69 | 209 | 
| Chris@69 | 210         /* Increase Q domain of the AR coefficients */ | 
| Chris@69 | 211         t0_s16x8 = vld1q_s16( A_Q12 +  0 ); | 
| Chris@69 | 212         t1_s16x8 = vld1q_s16( A_Q12 +  8 ); | 
| Chris@69 | 213         t2_s16x8 = vld1q_s16( A_Q12 + 16 ); | 
| Chris@69 | 214         t0_s32x4 = vpaddlq_s16( t0_s16x8 ); | 
| Chris@69 | 215 | 
| Chris@69 | 216         switch( order - leftover ) | 
| Chris@69 | 217         { | 
| Chris@69 | 218         case 24: | 
| Chris@69 | 219             t0_s32x4 = vpadalq_s16( t0_s32x4, t2_s16x8 ); | 
| Chris@69 | 220             /* FALLTHROUGH */ | 
| Chris@69 | 221 | 
| Chris@69 | 222         case 16: | 
| Chris@69 | 223             t0_s32x4 = vpadalq_s16( t0_s32x4, t1_s16x8 ); | 
| Chris@69 | 224             vst1q_s32( Atmp_QA + 16, vshll_n_s16( vget_low_s16 ( t2_s16x8 ), QA - 12 ) ); | 
| Chris@69 | 225             vst1q_s32( Atmp_QA + 20, vshll_n_s16( vget_high_s16( t2_s16x8 ), QA - 12 ) ); | 
| Chris@69 | 226             /* FALLTHROUGH */ | 
| Chris@69 | 227 | 
| Chris@69 | 228         case 8: | 
| Chris@69 | 229         { | 
| Chris@69 | 230             const int32x2_t t_s32x2 = vpadd_s32( vget_low_s32( t0_s32x4 ), vget_high_s32( t0_s32x4 ) ); | 
| Chris@69 | 231             const int64x1_t t_s64x1 = vpaddl_s32( t_s32x2 ); | 
| Chris@69 | 232             DC_resp = vget_lane_s32( vreinterpret_s32_s64( t_s64x1 ), 0 ); | 
| Chris@69 | 233             vst1q_s32( Atmp_QA +  8, vshll_n_s16( vget_low_s16 ( t1_s16x8 ), QA - 12 ) ); | 
| Chris@69 | 234             vst1q_s32( Atmp_QA + 12, vshll_n_s16( vget_high_s16( t1_s16x8 ), QA - 12 ) ); | 
| Chris@69 | 235         } | 
| Chris@69 | 236         break; | 
| Chris@69 | 237 | 
| Chris@69 | 238         default: | 
| Chris@69 | 239             DC_resp = 0; | 
| Chris@69 | 240             break; | 
| Chris@69 | 241         } | 
| Chris@69 | 242         A_Q12 += order - leftover; | 
| Chris@69 | 243 | 
| Chris@69 | 244         switch( leftover ) | 
| Chris@69 | 245         { | 
| Chris@69 | 246         case 6: | 
| Chris@69 | 247             DC_resp += (opus_int32)A_Q12[ 5 ]; | 
| Chris@69 | 248             DC_resp += (opus_int32)A_Q12[ 4 ]; | 
| Chris@69 | 249             /* FALLTHROUGH */ | 
| Chris@69 | 250 | 
| Chris@69 | 251         case 4: | 
| Chris@69 | 252             DC_resp += (opus_int32)A_Q12[ 3 ]; | 
| Chris@69 | 253             DC_resp += (opus_int32)A_Q12[ 2 ]; | 
| Chris@69 | 254             /* FALLTHROUGH */ | 
| Chris@69 | 255 | 
| Chris@69 | 256         case 2: | 
| Chris@69 | 257             DC_resp += (opus_int32)A_Q12[ 1 ]; | 
| Chris@69 | 258             DC_resp += (opus_int32)A_Q12[ 0 ]; | 
| Chris@69 | 259             /* FALLTHROUGH */ | 
| Chris@69 | 260 | 
| Chris@69 | 261         default: | 
| Chris@69 | 262             break; | 
| Chris@69 | 263         } | 
| Chris@69 | 264 | 
| Chris@69 | 265         /* If the DC is unstable, we don't even need to do the full calculations */ | 
| Chris@69 | 266         if( DC_resp >= 4096 ) { | 
| Chris@69 | 267             invGain_Q30 = 0; | 
| Chris@69 | 268         } else { | 
| Chris@69 | 269             vst1q_s32( Atmp_QA + 0, vshll_n_s16( vget_low_s16 ( t0_s16x8 ), QA - 12 ) ); | 
| Chris@69 | 270             vst1q_s32( Atmp_QA + 4, vshll_n_s16( vget_high_s16( t0_s16x8 ), QA - 12 ) ); | 
| Chris@69 | 271             invGain_Q30 = LPC_inverse_pred_gain_QA_neon( Atmp_QA, order ); | 
| Chris@69 | 272         } | 
| Chris@69 | 273     } | 
| Chris@69 | 274 | 
| Chris@69 | 275 #ifdef OPUS_CHECK_ASM | 
| Chris@69 | 276     silk_assert( invGain_Q30_c == invGain_Q30 ); | 
| Chris@69 | 277 #endif | 
| Chris@69 | 278 | 
| Chris@69 | 279     return invGain_Q30; | 
| Chris@69 | 280 } |