Chris@69
|
1 /* Copyright (c) 2002-2008 Jean-Marc Valin
|
Chris@69
|
2 Copyright (c) 2007-2008 CSIRO
|
Chris@69
|
3 Copyright (c) 2007-2009 Xiph.Org Foundation
|
Chris@69
|
4 Written by Jean-Marc Valin */
|
Chris@69
|
5 /**
|
Chris@69
|
6 @file mathops.h
|
Chris@69
|
7 @brief Various math functions
|
Chris@69
|
8 */
|
Chris@69
|
9 /*
|
Chris@69
|
10 Redistribution and use in source and binary forms, with or without
|
Chris@69
|
11 modification, are permitted provided that the following conditions
|
Chris@69
|
12 are met:
|
Chris@69
|
13
|
Chris@69
|
14 - Redistributions of source code must retain the above copyright
|
Chris@69
|
15 notice, this list of conditions and the following disclaimer.
|
Chris@69
|
16
|
Chris@69
|
17 - Redistributions in binary form must reproduce the above copyright
|
Chris@69
|
18 notice, this list of conditions and the following disclaimer in the
|
Chris@69
|
19 documentation and/or other materials provided with the distribution.
|
Chris@69
|
20
|
Chris@69
|
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
Chris@69
|
22 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
Chris@69
|
23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
Chris@69
|
24 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
Chris@69
|
25 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
Chris@69
|
26 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
Chris@69
|
27 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
Chris@69
|
28 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
Chris@69
|
29 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
Chris@69
|
30 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
Chris@69
|
31 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
Chris@69
|
32 */
|
Chris@69
|
33
|
Chris@69
|
34 #ifdef HAVE_CONFIG_H
|
Chris@69
|
35 #include "config.h"
|
Chris@69
|
36 #endif
|
Chris@69
|
37
|
Chris@69
|
38 #include "mathops.h"
|
Chris@69
|
39
|
Chris@69
|
40 /*Compute floor(sqrt(_val)) with exact arithmetic.
|
Chris@69
|
41 _val must be greater than 0.
|
Chris@69
|
42 This has been tested on all possible 32-bit inputs greater than 0.*/
|
Chris@69
|
43 unsigned isqrt32(opus_uint32 _val){
|
Chris@69
|
44 unsigned b;
|
Chris@69
|
45 unsigned g;
|
Chris@69
|
46 int bshift;
|
Chris@69
|
47 /*Uses the second method from
|
Chris@69
|
48 http://www.azillionmonkeys.com/qed/sqroot.html
|
Chris@69
|
49 The main idea is to search for the largest binary digit b such that
|
Chris@69
|
50 (g+b)*(g+b) <= _val, and add it to the solution g.*/
|
Chris@69
|
51 g=0;
|
Chris@69
|
52 bshift=(EC_ILOG(_val)-1)>>1;
|
Chris@69
|
53 b=1U<<bshift;
|
Chris@69
|
54 do{
|
Chris@69
|
55 opus_uint32 t;
|
Chris@69
|
56 t=(((opus_uint32)g<<1)+b)<<bshift;
|
Chris@69
|
57 if(t<=_val){
|
Chris@69
|
58 g+=b;
|
Chris@69
|
59 _val-=t;
|
Chris@69
|
60 }
|
Chris@69
|
61 b>>=1;
|
Chris@69
|
62 bshift--;
|
Chris@69
|
63 }
|
Chris@69
|
64 while(bshift>=0);
|
Chris@69
|
65 return g;
|
Chris@69
|
66 }
|
Chris@69
|
67
|
Chris@69
|
68 #ifdef FIXED_POINT
|
Chris@69
|
69
|
Chris@69
|
70 opus_val32 frac_div32(opus_val32 a, opus_val32 b)
|
Chris@69
|
71 {
|
Chris@69
|
72 opus_val16 rcp;
|
Chris@69
|
73 opus_val32 result, rem;
|
Chris@69
|
74 int shift = celt_ilog2(b)-29;
|
Chris@69
|
75 a = VSHR32(a,shift);
|
Chris@69
|
76 b = VSHR32(b,shift);
|
Chris@69
|
77 /* 16-bit reciprocal */
|
Chris@69
|
78 rcp = ROUND16(celt_rcp(ROUND16(b,16)),3);
|
Chris@69
|
79 result = MULT16_32_Q15(rcp, a);
|
Chris@69
|
80 rem = PSHR32(a,2)-MULT32_32_Q31(result, b);
|
Chris@69
|
81 result = ADD32(result, SHL32(MULT16_32_Q15(rcp, rem),2));
|
Chris@69
|
82 if (result >= 536870912) /* 2^29 */
|
Chris@69
|
83 return 2147483647; /* 2^31 - 1 */
|
Chris@69
|
84 else if (result <= -536870912) /* -2^29 */
|
Chris@69
|
85 return -2147483647; /* -2^31 */
|
Chris@69
|
86 else
|
Chris@69
|
87 return SHL32(result, 2);
|
Chris@69
|
88 }
|
Chris@69
|
89
|
Chris@69
|
90 /** Reciprocal sqrt approximation in the range [0.25,1) (Q16 in, Q14 out) */
|
Chris@69
|
91 opus_val16 celt_rsqrt_norm(opus_val32 x)
|
Chris@69
|
92 {
|
Chris@69
|
93 opus_val16 n;
|
Chris@69
|
94 opus_val16 r;
|
Chris@69
|
95 opus_val16 r2;
|
Chris@69
|
96 opus_val16 y;
|
Chris@69
|
97 /* Range of n is [-16384,32767] ([-0.5,1) in Q15). */
|
Chris@69
|
98 n = x-32768;
|
Chris@69
|
99 /* Get a rough initial guess for the root.
|
Chris@69
|
100 The optimal minimax quadratic approximation (using relative error) is
|
Chris@69
|
101 r = 1.437799046117536+n*(-0.823394375837328+n*0.4096419668459485).
|
Chris@69
|
102 Coefficients here, and the final result r, are Q14.*/
|
Chris@69
|
103 r = ADD16(23557, MULT16_16_Q15(n, ADD16(-13490, MULT16_16_Q15(n, 6713))));
|
Chris@69
|
104 /* We want y = x*r*r-1 in Q15, but x is 32-bit Q16 and r is Q14.
|
Chris@69
|
105 We can compute the result from n and r using Q15 multiplies with some
|
Chris@69
|
106 adjustment, carefully done to avoid overflow.
|
Chris@69
|
107 Range of y is [-1564,1594]. */
|
Chris@69
|
108 r2 = MULT16_16_Q15(r, r);
|
Chris@69
|
109 y = SHL16(SUB16(ADD16(MULT16_16_Q15(r2, n), r2), 16384), 1);
|
Chris@69
|
110 /* Apply a 2nd-order Householder iteration: r += r*y*(y*0.375-0.5).
|
Chris@69
|
111 This yields the Q14 reciprocal square root of the Q16 x, with a maximum
|
Chris@69
|
112 relative error of 1.04956E-4, a (relative) RMSE of 2.80979E-5, and a
|
Chris@69
|
113 peak absolute error of 2.26591/16384. */
|
Chris@69
|
114 return ADD16(r, MULT16_16_Q15(r, MULT16_16_Q15(y,
|
Chris@69
|
115 SUB16(MULT16_16_Q15(y, 12288), 16384))));
|
Chris@69
|
116 }
|
Chris@69
|
117
|
Chris@69
|
118 /** Sqrt approximation (QX input, QX/2 output) */
|
Chris@69
|
119 opus_val32 celt_sqrt(opus_val32 x)
|
Chris@69
|
120 {
|
Chris@69
|
121 int k;
|
Chris@69
|
122 opus_val16 n;
|
Chris@69
|
123 opus_val32 rt;
|
Chris@69
|
124 static const opus_val16 C[5] = {23175, 11561, -3011, 1699, -664};
|
Chris@69
|
125 if (x==0)
|
Chris@69
|
126 return 0;
|
Chris@69
|
127 else if (x>=1073741824)
|
Chris@69
|
128 return 32767;
|
Chris@69
|
129 k = (celt_ilog2(x)>>1)-7;
|
Chris@69
|
130 x = VSHR32(x, 2*k);
|
Chris@69
|
131 n = x-32768;
|
Chris@69
|
132 rt = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2],
|
Chris@69
|
133 MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, (C[4])))))))));
|
Chris@69
|
134 rt = VSHR32(rt,7-k);
|
Chris@69
|
135 return rt;
|
Chris@69
|
136 }
|
Chris@69
|
137
|
Chris@69
|
138 #define L1 32767
|
Chris@69
|
139 #define L2 -7651
|
Chris@69
|
140 #define L3 8277
|
Chris@69
|
141 #define L4 -626
|
Chris@69
|
142
|
Chris@69
|
143 static OPUS_INLINE opus_val16 _celt_cos_pi_2(opus_val16 x)
|
Chris@69
|
144 {
|
Chris@69
|
145 opus_val16 x2;
|
Chris@69
|
146
|
Chris@69
|
147 x2 = MULT16_16_P15(x,x);
|
Chris@69
|
148 return ADD16(1,MIN16(32766,ADD32(SUB16(L1,x2), MULT16_16_P15(x2, ADD32(L2, MULT16_16_P15(x2, ADD32(L3, MULT16_16_P15(L4, x2
|
Chris@69
|
149 ))))))));
|
Chris@69
|
150 }
|
Chris@69
|
151
|
Chris@69
|
152 #undef L1
|
Chris@69
|
153 #undef L2
|
Chris@69
|
154 #undef L3
|
Chris@69
|
155 #undef L4
|
Chris@69
|
156
|
Chris@69
|
157 opus_val16 celt_cos_norm(opus_val32 x)
|
Chris@69
|
158 {
|
Chris@69
|
159 x = x&0x0001ffff;
|
Chris@69
|
160 if (x>SHL32(EXTEND32(1), 16))
|
Chris@69
|
161 x = SUB32(SHL32(EXTEND32(1), 17),x);
|
Chris@69
|
162 if (x&0x00007fff)
|
Chris@69
|
163 {
|
Chris@69
|
164 if (x<SHL32(EXTEND32(1), 15))
|
Chris@69
|
165 {
|
Chris@69
|
166 return _celt_cos_pi_2(EXTRACT16(x));
|
Chris@69
|
167 } else {
|
Chris@69
|
168 return NEG16(_celt_cos_pi_2(EXTRACT16(65536-x)));
|
Chris@69
|
169 }
|
Chris@69
|
170 } else {
|
Chris@69
|
171 if (x&0x0000ffff)
|
Chris@69
|
172 return 0;
|
Chris@69
|
173 else if (x&0x0001ffff)
|
Chris@69
|
174 return -32767;
|
Chris@69
|
175 else
|
Chris@69
|
176 return 32767;
|
Chris@69
|
177 }
|
Chris@69
|
178 }
|
Chris@69
|
179
|
Chris@69
|
180 /** Reciprocal approximation (Q15 input, Q16 output) */
|
Chris@69
|
181 opus_val32 celt_rcp(opus_val32 x)
|
Chris@69
|
182 {
|
Chris@69
|
183 int i;
|
Chris@69
|
184 opus_val16 n;
|
Chris@69
|
185 opus_val16 r;
|
Chris@69
|
186 celt_sig_assert(x>0);
|
Chris@69
|
187 i = celt_ilog2(x);
|
Chris@69
|
188 /* n is Q15 with range [0,1). */
|
Chris@69
|
189 n = VSHR32(x,i-15)-32768;
|
Chris@69
|
190 /* Start with a linear approximation:
|
Chris@69
|
191 r = 1.8823529411764706-0.9411764705882353*n.
|
Chris@69
|
192 The coefficients and the result are Q14 in the range [15420,30840].*/
|
Chris@69
|
193 r = ADD16(30840, MULT16_16_Q15(-15420, n));
|
Chris@69
|
194 /* Perform two Newton iterations:
|
Chris@69
|
195 r -= r*((r*n)-1.Q15)
|
Chris@69
|
196 = r*((r*n)+(r-1.Q15)). */
|
Chris@69
|
197 r = SUB16(r, MULT16_16_Q15(r,
|
Chris@69
|
198 ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768))));
|
Chris@69
|
199 /* We subtract an extra 1 in the second iteration to avoid overflow; it also
|
Chris@69
|
200 neatly compensates for truncation error in the rest of the process. */
|
Chris@69
|
201 r = SUB16(r, ADD16(1, MULT16_16_Q15(r,
|
Chris@69
|
202 ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768)))));
|
Chris@69
|
203 /* r is now the Q15 solution to 2/(n+1), with a maximum relative error
|
Chris@69
|
204 of 7.05346E-5, a (relative) RMSE of 2.14418E-5, and a peak absolute
|
Chris@69
|
205 error of 1.24665/32768. */
|
Chris@69
|
206 return VSHR32(EXTEND32(r),i-16);
|
Chris@69
|
207 }
|
Chris@69
|
208
|
Chris@69
|
209 #endif
|