cannam@154: /* Copyright (c) 2002-2008 Jean-Marc Valin cannam@154: Copyright (c) 2007-2008 CSIRO cannam@154: Copyright (c) 2007-2009 Xiph.Org Foundation cannam@154: Written by Jean-Marc Valin */ cannam@154: /** cannam@154: @file mathops.h cannam@154: @brief Various math functions cannam@154: */ cannam@154: /* cannam@154: Redistribution and use in source and binary forms, with or without cannam@154: modification, are permitted provided that the following conditions cannam@154: are met: cannam@154: cannam@154: - Redistributions of source code must retain the above copyright cannam@154: notice, this list of conditions and the following disclaimer. cannam@154: cannam@154: - Redistributions in binary form must reproduce the above copyright cannam@154: notice, this list of conditions and the following disclaimer in the cannam@154: documentation and/or other materials provided with the distribution. cannam@154: cannam@154: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS cannam@154: ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT cannam@154: LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR cannam@154: A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER cannam@154: OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, cannam@154: EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, cannam@154: PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR cannam@154: PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF cannam@154: LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING cannam@154: NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS cannam@154: SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. cannam@154: */ cannam@154: cannam@154: #ifdef HAVE_CONFIG_H cannam@154: #include "config.h" cannam@154: #endif cannam@154: cannam@154: #include "mathops.h" cannam@154: cannam@154: /*Compute floor(sqrt(_val)) with exact arithmetic. cannam@154: _val must be greater than 0. cannam@154: This has been tested on all possible 32-bit inputs greater than 0.*/ cannam@154: unsigned isqrt32(opus_uint32 _val){ cannam@154: unsigned b; cannam@154: unsigned g; cannam@154: int bshift; cannam@154: /*Uses the second method from cannam@154: http://www.azillionmonkeys.com/qed/sqroot.html cannam@154: The main idea is to search for the largest binary digit b such that cannam@154: (g+b)*(g+b) <= _val, and add it to the solution g.*/ cannam@154: g=0; cannam@154: bshift=(EC_ILOG(_val)-1)>>1; cannam@154: b=1U<>=1; cannam@154: bshift--; cannam@154: } cannam@154: while(bshift>=0); cannam@154: return g; cannam@154: } cannam@154: cannam@154: #ifdef FIXED_POINT cannam@154: cannam@154: opus_val32 frac_div32(opus_val32 a, opus_val32 b) cannam@154: { cannam@154: opus_val16 rcp; cannam@154: opus_val32 result, rem; cannam@154: int shift = celt_ilog2(b)-29; cannam@154: a = VSHR32(a,shift); cannam@154: b = VSHR32(b,shift); cannam@154: /* 16-bit reciprocal */ cannam@154: rcp = ROUND16(celt_rcp(ROUND16(b,16)),3); cannam@154: result = MULT16_32_Q15(rcp, a); cannam@154: rem = PSHR32(a,2)-MULT32_32_Q31(result, b); cannam@154: result = ADD32(result, SHL32(MULT16_32_Q15(rcp, rem),2)); cannam@154: if (result >= 536870912) /* 2^29 */ cannam@154: return 2147483647; /* 2^31 - 1 */ cannam@154: else if (result <= -536870912) /* -2^29 */ cannam@154: return -2147483647; /* -2^31 */ cannam@154: else cannam@154: return SHL32(result, 2); cannam@154: } cannam@154: cannam@154: /** Reciprocal sqrt approximation in the range [0.25,1) (Q16 in, Q14 out) */ cannam@154: opus_val16 celt_rsqrt_norm(opus_val32 x) cannam@154: { cannam@154: opus_val16 n; cannam@154: opus_val16 r; cannam@154: opus_val16 r2; cannam@154: opus_val16 y; cannam@154: /* Range of n is [-16384,32767] ([-0.5,1) in Q15). */ cannam@154: n = x-32768; cannam@154: /* Get a rough initial guess for the root. cannam@154: The optimal minimax quadratic approximation (using relative error) is cannam@154: r = 1.437799046117536+n*(-0.823394375837328+n*0.4096419668459485). cannam@154: Coefficients here, and the final result r, are Q14.*/ cannam@154: r = ADD16(23557, MULT16_16_Q15(n, ADD16(-13490, MULT16_16_Q15(n, 6713)))); cannam@154: /* We want y = x*r*r-1 in Q15, but x is 32-bit Q16 and r is Q14. cannam@154: We can compute the result from n and r using Q15 multiplies with some cannam@154: adjustment, carefully done to avoid overflow. cannam@154: Range of y is [-1564,1594]. */ cannam@154: r2 = MULT16_16_Q15(r, r); cannam@154: y = SHL16(SUB16(ADD16(MULT16_16_Q15(r2, n), r2), 16384), 1); cannam@154: /* Apply a 2nd-order Householder iteration: r += r*y*(y*0.375-0.5). cannam@154: This yields the Q14 reciprocal square root of the Q16 x, with a maximum cannam@154: relative error of 1.04956E-4, a (relative) RMSE of 2.80979E-5, and a cannam@154: peak absolute error of 2.26591/16384. */ cannam@154: return ADD16(r, MULT16_16_Q15(r, MULT16_16_Q15(y, cannam@154: SUB16(MULT16_16_Q15(y, 12288), 16384)))); cannam@154: } cannam@154: cannam@154: /** Sqrt approximation (QX input, QX/2 output) */ cannam@154: opus_val32 celt_sqrt(opus_val32 x) cannam@154: { cannam@154: int k; cannam@154: opus_val16 n; cannam@154: opus_val32 rt; cannam@154: static const opus_val16 C[5] = {23175, 11561, -3011, 1699, -664}; cannam@154: if (x==0) cannam@154: return 0; cannam@154: else if (x>=1073741824) cannam@154: return 32767; cannam@154: k = (celt_ilog2(x)>>1)-7; cannam@154: x = VSHR32(x, 2*k); cannam@154: n = x-32768; cannam@154: rt = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2], cannam@154: MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, (C[4]))))))))); cannam@154: rt = VSHR32(rt,7-k); cannam@154: return rt; cannam@154: } cannam@154: cannam@154: #define L1 32767 cannam@154: #define L2 -7651 cannam@154: #define L3 8277 cannam@154: #define L4 -626 cannam@154: cannam@154: static OPUS_INLINE opus_val16 _celt_cos_pi_2(opus_val16 x) cannam@154: { cannam@154: opus_val16 x2; cannam@154: cannam@154: x2 = MULT16_16_P15(x,x); cannam@154: return ADD16(1,MIN16(32766,ADD32(SUB16(L1,x2), MULT16_16_P15(x2, ADD32(L2, MULT16_16_P15(x2, ADD32(L3, MULT16_16_P15(L4, x2 cannam@154: )))))))); cannam@154: } cannam@154: cannam@154: #undef L1 cannam@154: #undef L2 cannam@154: #undef L3 cannam@154: #undef L4 cannam@154: cannam@154: opus_val16 celt_cos_norm(opus_val32 x) cannam@154: { cannam@154: x = x&0x0001ffff; cannam@154: if (x>SHL32(EXTEND32(1), 16)) cannam@154: x = SUB32(SHL32(EXTEND32(1), 17),x); cannam@154: if (x&0x00007fff) cannam@154: { cannam@154: if (x0); cannam@154: i = celt_ilog2(x); cannam@154: /* n is Q15 with range [0,1). */ cannam@154: n = VSHR32(x,i-15)-32768; cannam@154: /* Start with a linear approximation: cannam@154: r = 1.8823529411764706-0.9411764705882353*n. cannam@154: The coefficients and the result are Q14 in the range [15420,30840].*/ cannam@154: r = ADD16(30840, MULT16_16_Q15(-15420, n)); cannam@154: /* Perform two Newton iterations: cannam@154: r -= r*((r*n)-1.Q15) cannam@154: = r*((r*n)+(r-1.Q15)). */ cannam@154: r = SUB16(r, MULT16_16_Q15(r, cannam@154: ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768)))); cannam@154: /* We subtract an extra 1 in the second iteration to avoid overflow; it also cannam@154: neatly compensates for truncation error in the rest of the process. */ cannam@154: r = SUB16(r, ADD16(1, MULT16_16_Q15(r, cannam@154: ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768))))); cannam@154: /* r is now the Q15 solution to 2/(n+1), with a maximum relative error cannam@154: of 7.05346E-5, a (relative) RMSE of 2.14418E-5, and a peak absolute cannam@154: error of 1.24665/32768. */ cannam@154: return VSHR32(EXTEND32(r),i-16); cannam@154: } cannam@154: cannam@154: #endif