annotate src/opus-1.3/celt/mathops.c @ 69:7aeed7906520

Add Opus sources and macOS builds
author Chris Cannam
date Wed, 23 Jan 2019 13:48:08 +0000
parents
children
rev   line source
Chris@69 1 /* Copyright (c) 2002-2008 Jean-Marc Valin
Chris@69 2 Copyright (c) 2007-2008 CSIRO
Chris@69 3 Copyright (c) 2007-2009 Xiph.Org Foundation
Chris@69 4 Written by Jean-Marc Valin */
Chris@69 5 /**
Chris@69 6 @file mathops.h
Chris@69 7 @brief Various math functions
Chris@69 8 */
Chris@69 9 /*
Chris@69 10 Redistribution and use in source and binary forms, with or without
Chris@69 11 modification, are permitted provided that the following conditions
Chris@69 12 are met:
Chris@69 13
Chris@69 14 - Redistributions of source code must retain the above copyright
Chris@69 15 notice, this list of conditions and the following disclaimer.
Chris@69 16
Chris@69 17 - Redistributions in binary form must reproduce the above copyright
Chris@69 18 notice, this list of conditions and the following disclaimer in the
Chris@69 19 documentation and/or other materials provided with the distribution.
Chris@69 20
Chris@69 21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
Chris@69 22 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
Chris@69 23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
Chris@69 24 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
Chris@69 25 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
Chris@69 26 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
Chris@69 27 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
Chris@69 28 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
Chris@69 29 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
Chris@69 30 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
Chris@69 31 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Chris@69 32 */
Chris@69 33
Chris@69 34 #ifdef HAVE_CONFIG_H
Chris@69 35 #include "config.h"
Chris@69 36 #endif
Chris@69 37
Chris@69 38 #include "mathops.h"
Chris@69 39
Chris@69 40 /*Compute floor(sqrt(_val)) with exact arithmetic.
Chris@69 41 _val must be greater than 0.
Chris@69 42 This has been tested on all possible 32-bit inputs greater than 0.*/
Chris@69 43 unsigned isqrt32(opus_uint32 _val){
Chris@69 44 unsigned b;
Chris@69 45 unsigned g;
Chris@69 46 int bshift;
Chris@69 47 /*Uses the second method from
Chris@69 48 http://www.azillionmonkeys.com/qed/sqroot.html
Chris@69 49 The main idea is to search for the largest binary digit b such that
Chris@69 50 (g+b)*(g+b) <= _val, and add it to the solution g.*/
Chris@69 51 g=0;
Chris@69 52 bshift=(EC_ILOG(_val)-1)>>1;
Chris@69 53 b=1U<<bshift;
Chris@69 54 do{
Chris@69 55 opus_uint32 t;
Chris@69 56 t=(((opus_uint32)g<<1)+b)<<bshift;
Chris@69 57 if(t<=_val){
Chris@69 58 g+=b;
Chris@69 59 _val-=t;
Chris@69 60 }
Chris@69 61 b>>=1;
Chris@69 62 bshift--;
Chris@69 63 }
Chris@69 64 while(bshift>=0);
Chris@69 65 return g;
Chris@69 66 }
Chris@69 67
Chris@69 68 #ifdef FIXED_POINT
Chris@69 69
Chris@69 70 opus_val32 frac_div32(opus_val32 a, opus_val32 b)
Chris@69 71 {
Chris@69 72 opus_val16 rcp;
Chris@69 73 opus_val32 result, rem;
Chris@69 74 int shift = celt_ilog2(b)-29;
Chris@69 75 a = VSHR32(a,shift);
Chris@69 76 b = VSHR32(b,shift);
Chris@69 77 /* 16-bit reciprocal */
Chris@69 78 rcp = ROUND16(celt_rcp(ROUND16(b,16)),3);
Chris@69 79 result = MULT16_32_Q15(rcp, a);
Chris@69 80 rem = PSHR32(a,2)-MULT32_32_Q31(result, b);
Chris@69 81 result = ADD32(result, SHL32(MULT16_32_Q15(rcp, rem),2));
Chris@69 82 if (result >= 536870912) /* 2^29 */
Chris@69 83 return 2147483647; /* 2^31 - 1 */
Chris@69 84 else if (result <= -536870912) /* -2^29 */
Chris@69 85 return -2147483647; /* -2^31 */
Chris@69 86 else
Chris@69 87 return SHL32(result, 2);
Chris@69 88 }
Chris@69 89
Chris@69 90 /** Reciprocal sqrt approximation in the range [0.25,1) (Q16 in, Q14 out) */
Chris@69 91 opus_val16 celt_rsqrt_norm(opus_val32 x)
Chris@69 92 {
Chris@69 93 opus_val16 n;
Chris@69 94 opus_val16 r;
Chris@69 95 opus_val16 r2;
Chris@69 96 opus_val16 y;
Chris@69 97 /* Range of n is [-16384,32767] ([-0.5,1) in Q15). */
Chris@69 98 n = x-32768;
Chris@69 99 /* Get a rough initial guess for the root.
Chris@69 100 The optimal minimax quadratic approximation (using relative error) is
Chris@69 101 r = 1.437799046117536+n*(-0.823394375837328+n*0.4096419668459485).
Chris@69 102 Coefficients here, and the final result r, are Q14.*/
Chris@69 103 r = ADD16(23557, MULT16_16_Q15(n, ADD16(-13490, MULT16_16_Q15(n, 6713))));
Chris@69 104 /* We want y = x*r*r-1 in Q15, but x is 32-bit Q16 and r is Q14.
Chris@69 105 We can compute the result from n and r using Q15 multiplies with some
Chris@69 106 adjustment, carefully done to avoid overflow.
Chris@69 107 Range of y is [-1564,1594]. */
Chris@69 108 r2 = MULT16_16_Q15(r, r);
Chris@69 109 y = SHL16(SUB16(ADD16(MULT16_16_Q15(r2, n), r2), 16384), 1);
Chris@69 110 /* Apply a 2nd-order Householder iteration: r += r*y*(y*0.375-0.5).
Chris@69 111 This yields the Q14 reciprocal square root of the Q16 x, with a maximum
Chris@69 112 relative error of 1.04956E-4, a (relative) RMSE of 2.80979E-5, and a
Chris@69 113 peak absolute error of 2.26591/16384. */
Chris@69 114 return ADD16(r, MULT16_16_Q15(r, MULT16_16_Q15(y,
Chris@69 115 SUB16(MULT16_16_Q15(y, 12288), 16384))));
Chris@69 116 }
Chris@69 117
Chris@69 118 /** Sqrt approximation (QX input, QX/2 output) */
Chris@69 119 opus_val32 celt_sqrt(opus_val32 x)
Chris@69 120 {
Chris@69 121 int k;
Chris@69 122 opus_val16 n;
Chris@69 123 opus_val32 rt;
Chris@69 124 static const opus_val16 C[5] = {23175, 11561, -3011, 1699, -664};
Chris@69 125 if (x==0)
Chris@69 126 return 0;
Chris@69 127 else if (x>=1073741824)
Chris@69 128 return 32767;
Chris@69 129 k = (celt_ilog2(x)>>1)-7;
Chris@69 130 x = VSHR32(x, 2*k);
Chris@69 131 n = x-32768;
Chris@69 132 rt = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2],
Chris@69 133 MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, (C[4])))))))));
Chris@69 134 rt = VSHR32(rt,7-k);
Chris@69 135 return rt;
Chris@69 136 }
Chris@69 137
Chris@69 138 #define L1 32767
Chris@69 139 #define L2 -7651
Chris@69 140 #define L3 8277
Chris@69 141 #define L4 -626
Chris@69 142
Chris@69 143 static OPUS_INLINE opus_val16 _celt_cos_pi_2(opus_val16 x)
Chris@69 144 {
Chris@69 145 opus_val16 x2;
Chris@69 146
Chris@69 147 x2 = MULT16_16_P15(x,x);
Chris@69 148 return ADD16(1,MIN16(32766,ADD32(SUB16(L1,x2), MULT16_16_P15(x2, ADD32(L2, MULT16_16_P15(x2, ADD32(L3, MULT16_16_P15(L4, x2
Chris@69 149 ))))))));
Chris@69 150 }
Chris@69 151
Chris@69 152 #undef L1
Chris@69 153 #undef L2
Chris@69 154 #undef L3
Chris@69 155 #undef L4
Chris@69 156
Chris@69 157 opus_val16 celt_cos_norm(opus_val32 x)
Chris@69 158 {
Chris@69 159 x = x&0x0001ffff;
Chris@69 160 if (x>SHL32(EXTEND32(1), 16))
Chris@69 161 x = SUB32(SHL32(EXTEND32(1), 17),x);
Chris@69 162 if (x&0x00007fff)
Chris@69 163 {
Chris@69 164 if (x<SHL32(EXTEND32(1), 15))
Chris@69 165 {
Chris@69 166 return _celt_cos_pi_2(EXTRACT16(x));
Chris@69 167 } else {
Chris@69 168 return NEG16(_celt_cos_pi_2(EXTRACT16(65536-x)));
Chris@69 169 }
Chris@69 170 } else {
Chris@69 171 if (x&0x0000ffff)
Chris@69 172 return 0;
Chris@69 173 else if (x&0x0001ffff)
Chris@69 174 return -32767;
Chris@69 175 else
Chris@69 176 return 32767;
Chris@69 177 }
Chris@69 178 }
Chris@69 179
Chris@69 180 /** Reciprocal approximation (Q15 input, Q16 output) */
Chris@69 181 opus_val32 celt_rcp(opus_val32 x)
Chris@69 182 {
Chris@69 183 int i;
Chris@69 184 opus_val16 n;
Chris@69 185 opus_val16 r;
Chris@69 186 celt_sig_assert(x>0);
Chris@69 187 i = celt_ilog2(x);
Chris@69 188 /* n is Q15 with range [0,1). */
Chris@69 189 n = VSHR32(x,i-15)-32768;
Chris@69 190 /* Start with a linear approximation:
Chris@69 191 r = 1.8823529411764706-0.9411764705882353*n.
Chris@69 192 The coefficients and the result are Q14 in the range [15420,30840].*/
Chris@69 193 r = ADD16(30840, MULT16_16_Q15(-15420, n));
Chris@69 194 /* Perform two Newton iterations:
Chris@69 195 r -= r*((r*n)-1.Q15)
Chris@69 196 = r*((r*n)+(r-1.Q15)). */
Chris@69 197 r = SUB16(r, MULT16_16_Q15(r,
Chris@69 198 ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768))));
Chris@69 199 /* We subtract an extra 1 in the second iteration to avoid overflow; it also
Chris@69 200 neatly compensates for truncation error in the rest of the process. */
Chris@69 201 r = SUB16(r, ADD16(1, MULT16_16_Q15(r,
Chris@69 202 ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768)))));
Chris@69 203 /* r is now the Q15 solution to 2/(n+1), with a maximum relative error
Chris@69 204 of 7.05346E-5, a (relative) RMSE of 2.14418E-5, and a peak absolute
Chris@69 205 error of 1.24665/32768. */
Chris@69 206 return VSHR32(EXTEND32(r),i-16);
Chris@69 207 }
Chris@69 208
Chris@69 209 #endif