Mercurial > hg > silvet
comparison bqvec/src/VectorOpsComplex.cpp @ 366:5d0a2ebb4d17
Bring dependent libraries in to repo
author | Chris Cannam |
---|---|
date | Fri, 24 Jun 2016 14:47:45 +0100 |
parents | |
children | af71cbdab621 |
comparison
equal
deleted
inserted
replaced
365:112766f4c34b | 366:5d0a2ebb4d17 |
---|---|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ | |
2 | |
3 /* | |
4 bqvec | |
5 | |
6 A small library for vector arithmetic and allocation in C++ using | |
7 raw C pointer arrays. | |
8 | |
9 Copyright 2007-2015 Particular Programs Ltd. | |
10 | |
11 Permission is hereby granted, free of charge, to any person | |
12 obtaining a copy of this software and associated documentation | |
13 files (the "Software"), to deal in the Software without | |
14 restriction, including without limitation the rights to use, copy, | |
15 modify, merge, publish, distribute, sublicense, and/or sell copies | |
16 of the Software, and to permit persons to whom the Software is | |
17 furnished to do so, subject to the following conditions: | |
18 | |
19 The above copyright notice and this permission notice shall be | |
20 included in all copies or substantial portions of the Software. | |
21 | |
22 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
23 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
24 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
25 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR | |
26 ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF | |
27 CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION | |
28 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |
29 | |
30 Except as contained in this notice, the names of Chris Cannam and | |
31 Particular Programs Ltd shall not be used in advertising or | |
32 otherwise to promote the sale, use or other dealings in this | |
33 Software without prior written authorization. | |
34 */ | |
35 | |
36 #include "bqvec/VectorOpsComplex.h" | |
37 | |
38 #include <cassert> | |
39 | |
40 #ifdef __MSVC__ | |
41 #include <malloc.h> | |
42 #define alloca _alloca | |
43 #endif | |
44 | |
45 #if defined USE_POMMIER_MATHFUN | |
46 #if defined __ARMEL__ | |
47 #include "pommier/neon_mathfun.h" | |
48 #else | |
49 #include "pommier/sse_mathfun.h" | |
50 #endif | |
51 #endif | |
52 | |
53 #ifdef __MSVC__ | |
54 #include <malloc.h> | |
55 #define alloca _alloca | |
56 #endif | |
57 | |
58 namespace breakfastquay { | |
59 | |
60 #ifdef USE_APPROXIMATE_ATAN2 | |
61 float approximate_atan2f(float real, float imag) | |
62 { | |
63 static const float pi = M_PI; | |
64 static const float pi2 = M_PI / 2; | |
65 | |
66 float atan; | |
67 | |
68 if (real == 0.f) { | |
69 | |
70 if (imag > 0.0f) atan = pi2; | |
71 else if (imag == 0.0f) atan = 0.0f; | |
72 else atan = -pi2; | |
73 | |
74 } else { | |
75 | |
76 float z = imag/real; | |
77 | |
78 if (fabsf(z) < 1.f) { | |
79 atan = z / (1.f + 0.28f * z * z); | |
80 if (real < 0.f) { | |
81 if (imag < 0.f) atan -= pi; | |
82 else atan += pi; | |
83 } | |
84 } else { | |
85 atan = pi2 - z / (z * z + 0.28f); | |
86 if (imag < 0.f) atan -= pi; | |
87 } | |
88 } | |
89 } | |
90 #endif | |
91 | |
92 #if defined USE_POMMIER_MATHFUN | |
93 | |
94 #ifdef __ARMEL__ | |
95 typedef union { | |
96 float f[4]; | |
97 int i[4]; | |
98 v4sf v; | |
99 } V4SF; | |
100 #else | |
101 typedef ALIGN16_BEG union { | |
102 float f[4]; | |
103 int i[4]; | |
104 v4sf v; | |
105 } ALIGN16_END V4SF; | |
106 #endif | |
107 | |
108 void | |
109 v_polar_to_cartesian_pommier(float *const BQ_R__ real, | |
110 float *const BQ_R__ imag, | |
111 const float *const BQ_R__ mag, | |
112 const float *const BQ_R__ phase, | |
113 const int count) | |
114 { | |
115 int idx = 0, tidx = 0; | |
116 int i = 0; | |
117 | |
118 for (int i = 0; i + 4 < count; i += 4) { | |
119 | |
120 V4SF fmag, fphase, fre, fim; | |
121 | |
122 for (int j = 0; j < 3; ++j) { | |
123 fmag.f[j] = mag[idx]; | |
124 fphase.f[j] = phase[idx++]; | |
125 } | |
126 | |
127 sincos_ps(fphase.v, &fim.v, &fre.v); | |
128 | |
129 for (int j = 0; j < 3; ++j) { | |
130 real[tidx] = fre.f[j] * fmag.f[j]; | |
131 imag[tidx++] = fim.f[j] * fmag.f[j]; | |
132 } | |
133 } | |
134 | |
135 while (i < count) { | |
136 float re, im; | |
137 c_phasor(&re, &im, phase[i]); | |
138 real[tidx] = re * mag[i]; | |
139 imag[tidx++] = im * mag[i]; | |
140 ++i; | |
141 } | |
142 } | |
143 | |
144 void | |
145 v_polar_interleaved_to_cartesian_inplace_pommier(float *const BQ_R__ srcdst, | |
146 const int count) | |
147 { | |
148 int i; | |
149 int idx = 0, tidx = 0; | |
150 | |
151 for (i = 0; i + 4 < count; i += 4) { | |
152 | |
153 V4SF fmag, fphase, fre, fim; | |
154 | |
155 for (int j = 0; j < 3; ++j) { | |
156 fmag.f[j] = srcdst[idx++]; | |
157 fphase.f[j] = srcdst[idx++]; | |
158 } | |
159 | |
160 sincos_ps(fphase.v, &fim.v, &fre.v); | |
161 | |
162 for (int j = 0; j < 3; ++j) { | |
163 srcdst[tidx++] = fre.f[j] * fmag.f[j]; | |
164 srcdst[tidx++] = fim.f[j] * fmag.f[j]; | |
165 } | |
166 } | |
167 | |
168 while (i < count) { | |
169 float real, imag; | |
170 float mag = srcdst[idx++]; | |
171 float phase = srcdst[idx++]; | |
172 c_phasor(&real, &imag, phase); | |
173 srcdst[tidx++] = real * mag; | |
174 srcdst[tidx++] = imag * mag; | |
175 ++i; | |
176 } | |
177 } | |
178 | |
179 void | |
180 v_polar_to_cartesian_interleaved_pommier(float *const BQ_R__ dst, | |
181 const float *const BQ_R__ mag, | |
182 const float *const BQ_R__ phase, | |
183 const int count) | |
184 { | |
185 int i; | |
186 int idx = 0, tidx = 0; | |
187 | |
188 for (i = 0; i + 4 <= count; i += 4) { | |
189 | |
190 V4SF fmag, fphase, fre, fim; | |
191 | |
192 for (int j = 0; j < 3; ++j) { | |
193 fmag.f[j] = mag[idx]; | |
194 fphase.f[j] = phase[idx]; | |
195 ++idx; | |
196 } | |
197 | |
198 sincos_ps(fphase.v, &fim.v, &fre.v); | |
199 | |
200 for (int j = 0; j < 3; ++j) { | |
201 dst[tidx++] = fre.f[j] * fmag.f[j]; | |
202 dst[tidx++] = fim.f[j] * fmag.f[j]; | |
203 } | |
204 } | |
205 | |
206 while (i < count) { | |
207 float real, imag; | |
208 c_phasor(&real, &imag, phase[i]); | |
209 dst[tidx++] = real * mag[i]; | |
210 dst[tidx++] = imag * mag[i]; | |
211 ++i; | |
212 } | |
213 } | |
214 | |
215 #endif | |
216 | |
217 #ifndef NO_COMPLEX_TYPES | |
218 | |
219 #if defined HAVE_IPP | |
220 | |
221 void | |
222 v_polar_to_cartesian(bq_complex_t *const BQ_R__ dst, | |
223 const bq_complex_element_t *const BQ_R__ mag, | |
224 const bq_complex_element_t *const BQ_R__ phase, | |
225 const int count) | |
226 { | |
227 if (sizeof(bq_complex_element_t) == sizeof(float)) { | |
228 ippsPolarToCart_32fc((const float *)mag, (const float *)phase, | |
229 (Ipp32fc *)dst, count); | |
230 } else { | |
231 ippsPolarToCart_64fc((const double *)mag, (const double *)phase, | |
232 (Ipp64fc *)dst, count); | |
233 } | |
234 } | |
235 | |
236 #elif defined HAVE_VDSP | |
237 | |
238 void | |
239 v_polar_to_cartesian(bq_complex_t *const BQ_R__ dst, | |
240 const bq_complex_element_t *const BQ_R__ mag, | |
241 const bq_complex_element_t *const BQ_R__ phase, | |
242 const int count) | |
243 { | |
244 bq_complex_element_t *sc = (bq_complex_element_t *) | |
245 alloca(count * 2 * sizeof(bq_complex_element_t)); | |
246 | |
247 if (sizeof(bq_complex_element_t) == sizeof(float)) { | |
248 vvsincosf((float *)sc, (float *)(sc + count), (float *)phase, &count); | |
249 } else { | |
250 vvsincos((double *)sc, (double *)(sc + count), (double *)phase, &count); | |
251 } | |
252 | |
253 int sini = 0; | |
254 int cosi = count; | |
255 | |
256 for (int i = 0; i < count; ++i) { | |
257 dst[i].re = mag[i] * sc[cosi++]; | |
258 dst[i].im = mag[i] * sc[sini++]; | |
259 } | |
260 } | |
261 | |
262 #else | |
263 | |
264 void | |
265 v_polar_to_cartesian(bq_complex_t *const BQ_R__ dst, | |
266 const bq_complex_element_t *const BQ_R__ mag, | |
267 const bq_complex_element_t *const BQ_R__ phase, | |
268 const int count) | |
269 { | |
270 for (int i = 0; i < count; ++i) { | |
271 dst[i] = c_phasor(phase[i]); | |
272 } | |
273 for (int i = 0; i < count; ++i) { | |
274 dst[i].re *= mag[i]; | |
275 dst[i].im *= mag[i]; | |
276 } | |
277 } | |
278 | |
279 #endif | |
280 | |
281 #if defined USE_POMMIER_MATHFUN | |
282 | |
283 //!!! further tests reqd. This is only single precision but it seems | |
284 //!!! to be much faster than normal math library sincos. The comments | |
285 //!!! note that precision suffers for high arguments to sincos though, | |
286 //!!! and that is probably a common case for us | |
287 | |
288 void | |
289 v_polar_interleaved_to_cartesian(bq_complex_t *const BQ_R__ dst, | |
290 const bq_complex_element_t *const BQ_R__ src, | |
291 const int count) | |
292 { | |
293 int idx = 0, tidx = 0; | |
294 | |
295 for (int i = 0; i < count; i += 4) { | |
296 | |
297 V4SF fmag, fphase, fre, fim; | |
298 | |
299 for (int j = 0; j < 3; ++j) { | |
300 fmag.f[j] = src[idx++]; | |
301 fphase.f[j] = src[idx++]; | |
302 } | |
303 | |
304 sincos_ps(fphase.v, &fim.v, &fre.v); | |
305 | |
306 for (int j = 0; j < 3; ++j) { | |
307 dst[tidx].re = fre.f[j] * fmag.f[j]; | |
308 dst[tidx++].im = fim.f[j] * fmag.f[j]; | |
309 } | |
310 } | |
311 } | |
312 | |
313 #elif (defined HAVE_IPP || defined HAVE_VDSP) | |
314 | |
315 // with a vector library, it should be faster to deinterleave and call | |
316 // the basic fn | |
317 | |
318 void | |
319 v_polar_interleaved_to_cartesian(bq_complex_t *const BQ_R__ dst, | |
320 const bq_complex_element_t *const BQ_R__ src, | |
321 const int count) | |
322 { | |
323 bq_complex_element_t *mag = (bq_complex_element_t *) | |
324 alloca(count * sizeof(bq_complex_element_t)); | |
325 bq_complex_element_t *phase = (bq_complex_element_t *) | |
326 alloca(count * sizeof(bq_complex_element_t)); | |
327 bq_complex_element_t *magphase[] = { mag, phase }; | |
328 | |
329 v_deinterleave(magphase, src, 2, count); | |
330 v_polar_to_cartesian(dst, mag, phase, count); | |
331 } | |
332 | |
333 #else | |
334 | |
335 // without a vector library, better avoid the deinterleave step | |
336 | |
337 void | |
338 v_polar_interleaved_to_cartesian(bq_complex_t *const BQ_R__ dst, | |
339 const bq_complex_element_t *const BQ_R__ src, | |
340 const int count) | |
341 { | |
342 bq_complex_element_t mag, phase; | |
343 int idx = 0; | |
344 for (int i = 0; i < count; ++i) { | |
345 mag = src[idx++]; | |
346 phase = src[idx++]; | |
347 dst[i] = c_phasor(phase); | |
348 dst[i].re *= mag; | |
349 dst[i].im *= mag; | |
350 } | |
351 } | |
352 | |
353 #endif | |
354 | |
355 void | |
356 v_polar_interleaved_to_cartesian_inplace(bq_complex_element_t *const BQ_R__ srcdst, | |
357 const int count) | |
358 { | |
359 // Not ideal | |
360 bq_complex_element_t mag, phase; | |
361 int ii = 0, io = 0; | |
362 for (int i = 0; i < count; ++i) { | |
363 mag = srcdst[ii++]; | |
364 phase = srcdst[ii++]; | |
365 bq_complex_t p = c_phasor(phase); | |
366 srcdst[io++] = mag * p.re; | |
367 srcdst[io++] = mag * p.im; | |
368 } | |
369 } | |
370 | |
371 #endif | |
372 | |
373 } |