Mercurial > hg > sv-dependency-builds
comparison src/opus-1.3/celt/mathops.c @ 154:4664ac0c1032
Add Opus sources and macOS builds
author | Chris Cannam <cannam@all-day-breakfast.com> |
---|---|
date | Wed, 23 Jan 2019 13:48:08 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
153:84bc3a5ec321 | 154:4664ac0c1032 |
---|---|
1 /* Copyright (c) 2002-2008 Jean-Marc Valin | |
2 Copyright (c) 2007-2008 CSIRO | |
3 Copyright (c) 2007-2009 Xiph.Org Foundation | |
4 Written by Jean-Marc Valin */ | |
5 /** | |
6 @file mathops.h | |
7 @brief Various math functions | |
8 */ | |
9 /* | |
10 Redistribution and use in source and binary forms, with or without | |
11 modification, are permitted provided that the following conditions | |
12 are met: | |
13 | |
14 - Redistributions of source code must retain the above copyright | |
15 notice, this list of conditions and the following disclaimer. | |
16 | |
17 - Redistributions in binary form must reproduce the above copyright | |
18 notice, this list of conditions and the following disclaimer in the | |
19 documentation and/or other materials provided with the distribution. | |
20 | |
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
22 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
24 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER | |
25 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
26 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
27 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
28 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | |
29 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
30 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
31 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
32 */ | |
33 | |
34 #ifdef HAVE_CONFIG_H | |
35 #include "config.h" | |
36 #endif | |
37 | |
38 #include "mathops.h" | |
39 | |
40 /*Compute floor(sqrt(_val)) with exact arithmetic. | |
41 _val must be greater than 0. | |
42 This has been tested on all possible 32-bit inputs greater than 0.*/ | |
43 unsigned isqrt32(opus_uint32 _val){ | |
44 unsigned b; | |
45 unsigned g; | |
46 int bshift; | |
47 /*Uses the second method from | |
48 http://www.azillionmonkeys.com/qed/sqroot.html | |
49 The main idea is to search for the largest binary digit b such that | |
50 (g+b)*(g+b) <= _val, and add it to the solution g.*/ | |
51 g=0; | |
52 bshift=(EC_ILOG(_val)-1)>>1; | |
53 b=1U<<bshift; | |
54 do{ | |
55 opus_uint32 t; | |
56 t=(((opus_uint32)g<<1)+b)<<bshift; | |
57 if(t<=_val){ | |
58 g+=b; | |
59 _val-=t; | |
60 } | |
61 b>>=1; | |
62 bshift--; | |
63 } | |
64 while(bshift>=0); | |
65 return g; | |
66 } | |
67 | |
68 #ifdef FIXED_POINT | |
69 | |
70 opus_val32 frac_div32(opus_val32 a, opus_val32 b) | |
71 { | |
72 opus_val16 rcp; | |
73 opus_val32 result, rem; | |
74 int shift = celt_ilog2(b)-29; | |
75 a = VSHR32(a,shift); | |
76 b = VSHR32(b,shift); | |
77 /* 16-bit reciprocal */ | |
78 rcp = ROUND16(celt_rcp(ROUND16(b,16)),3); | |
79 result = MULT16_32_Q15(rcp, a); | |
80 rem = PSHR32(a,2)-MULT32_32_Q31(result, b); | |
81 result = ADD32(result, SHL32(MULT16_32_Q15(rcp, rem),2)); | |
82 if (result >= 536870912) /* 2^29 */ | |
83 return 2147483647; /* 2^31 - 1 */ | |
84 else if (result <= -536870912) /* -2^29 */ | |
85 return -2147483647; /* -2^31 */ | |
86 else | |
87 return SHL32(result, 2); | |
88 } | |
89 | |
90 /** Reciprocal sqrt approximation in the range [0.25,1) (Q16 in, Q14 out) */ | |
91 opus_val16 celt_rsqrt_norm(opus_val32 x) | |
92 { | |
93 opus_val16 n; | |
94 opus_val16 r; | |
95 opus_val16 r2; | |
96 opus_val16 y; | |
97 /* Range of n is [-16384,32767] ([-0.5,1) in Q15). */ | |
98 n = x-32768; | |
99 /* Get a rough initial guess for the root. | |
100 The optimal minimax quadratic approximation (using relative error) is | |
101 r = 1.437799046117536+n*(-0.823394375837328+n*0.4096419668459485). | |
102 Coefficients here, and the final result r, are Q14.*/ | |
103 r = ADD16(23557, MULT16_16_Q15(n, ADD16(-13490, MULT16_16_Q15(n, 6713)))); | |
104 /* We want y = x*r*r-1 in Q15, but x is 32-bit Q16 and r is Q14. | |
105 We can compute the result from n and r using Q15 multiplies with some | |
106 adjustment, carefully done to avoid overflow. | |
107 Range of y is [-1564,1594]. */ | |
108 r2 = MULT16_16_Q15(r, r); | |
109 y = SHL16(SUB16(ADD16(MULT16_16_Q15(r2, n), r2), 16384), 1); | |
110 /* Apply a 2nd-order Householder iteration: r += r*y*(y*0.375-0.5). | |
111 This yields the Q14 reciprocal square root of the Q16 x, with a maximum | |
112 relative error of 1.04956E-4, a (relative) RMSE of 2.80979E-5, and a | |
113 peak absolute error of 2.26591/16384. */ | |
114 return ADD16(r, MULT16_16_Q15(r, MULT16_16_Q15(y, | |
115 SUB16(MULT16_16_Q15(y, 12288), 16384)))); | |
116 } | |
117 | |
118 /** Sqrt approximation (QX input, QX/2 output) */ | |
119 opus_val32 celt_sqrt(opus_val32 x) | |
120 { | |
121 int k; | |
122 opus_val16 n; | |
123 opus_val32 rt; | |
124 static const opus_val16 C[5] = {23175, 11561, -3011, 1699, -664}; | |
125 if (x==0) | |
126 return 0; | |
127 else if (x>=1073741824) | |
128 return 32767; | |
129 k = (celt_ilog2(x)>>1)-7; | |
130 x = VSHR32(x, 2*k); | |
131 n = x-32768; | |
132 rt = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2], | |
133 MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, (C[4]))))))))); | |
134 rt = VSHR32(rt,7-k); | |
135 return rt; | |
136 } | |
137 | |
138 #define L1 32767 | |
139 #define L2 -7651 | |
140 #define L3 8277 | |
141 #define L4 -626 | |
142 | |
143 static OPUS_INLINE opus_val16 _celt_cos_pi_2(opus_val16 x) | |
144 { | |
145 opus_val16 x2; | |
146 | |
147 x2 = MULT16_16_P15(x,x); | |
148 return ADD16(1,MIN16(32766,ADD32(SUB16(L1,x2), MULT16_16_P15(x2, ADD32(L2, MULT16_16_P15(x2, ADD32(L3, MULT16_16_P15(L4, x2 | |
149 )))))))); | |
150 } | |
151 | |
152 #undef L1 | |
153 #undef L2 | |
154 #undef L3 | |
155 #undef L4 | |
156 | |
157 opus_val16 celt_cos_norm(opus_val32 x) | |
158 { | |
159 x = x&0x0001ffff; | |
160 if (x>SHL32(EXTEND32(1), 16)) | |
161 x = SUB32(SHL32(EXTEND32(1), 17),x); | |
162 if (x&0x00007fff) | |
163 { | |
164 if (x<SHL32(EXTEND32(1), 15)) | |
165 { | |
166 return _celt_cos_pi_2(EXTRACT16(x)); | |
167 } else { | |
168 return NEG16(_celt_cos_pi_2(EXTRACT16(65536-x))); | |
169 } | |
170 } else { | |
171 if (x&0x0000ffff) | |
172 return 0; | |
173 else if (x&0x0001ffff) | |
174 return -32767; | |
175 else | |
176 return 32767; | |
177 } | |
178 } | |
179 | |
180 /** Reciprocal approximation (Q15 input, Q16 output) */ | |
181 opus_val32 celt_rcp(opus_val32 x) | |
182 { | |
183 int i; | |
184 opus_val16 n; | |
185 opus_val16 r; | |
186 celt_sig_assert(x>0); | |
187 i = celt_ilog2(x); | |
188 /* n is Q15 with range [0,1). */ | |
189 n = VSHR32(x,i-15)-32768; | |
190 /* Start with a linear approximation: | |
191 r = 1.8823529411764706-0.9411764705882353*n. | |
192 The coefficients and the result are Q14 in the range [15420,30840].*/ | |
193 r = ADD16(30840, MULT16_16_Q15(-15420, n)); | |
194 /* Perform two Newton iterations: | |
195 r -= r*((r*n)-1.Q15) | |
196 = r*((r*n)+(r-1.Q15)). */ | |
197 r = SUB16(r, MULT16_16_Q15(r, | |
198 ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768)))); | |
199 /* We subtract an extra 1 in the second iteration to avoid overflow; it also | |
200 neatly compensates for truncation error in the rest of the process. */ | |
201 r = SUB16(r, ADD16(1, MULT16_16_Q15(r, | |
202 ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768))))); | |
203 /* r is now the Q15 solution to 2/(n+1), with a maximum relative error | |
204 of 7.05346E-5, a (relative) RMSE of 2.14418E-5, and a peak absolute | |
205 error of 1.24665/32768. */ | |
206 return VSHR32(EXTEND32(r),i-16); | |
207 } | |
208 | |
209 #endif |