Chris@366
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
Chris@366
|
2
|
Chris@366
|
3 /*
|
Chris@366
|
4 bqvec
|
Chris@366
|
5
|
Chris@366
|
6 A small library for vector arithmetic and allocation in C++ using
|
Chris@366
|
7 raw C pointer arrays.
|
Chris@366
|
8
|
Chris@372
|
9 Copyright 2007-2016 Particular Programs Ltd.
|
Chris@366
|
10
|
Chris@366
|
11 Permission is hereby granted, free of charge, to any person
|
Chris@366
|
12 obtaining a copy of this software and associated documentation
|
Chris@366
|
13 files (the "Software"), to deal in the Software without
|
Chris@366
|
14 restriction, including without limitation the rights to use, copy,
|
Chris@366
|
15 modify, merge, publish, distribute, sublicense, and/or sell copies
|
Chris@366
|
16 of the Software, and to permit persons to whom the Software is
|
Chris@366
|
17 furnished to do so, subject to the following conditions:
|
Chris@366
|
18
|
Chris@366
|
19 The above copyright notice and this permission notice shall be
|
Chris@366
|
20 included in all copies or substantial portions of the Software.
|
Chris@366
|
21
|
Chris@366
|
22 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
Chris@366
|
23 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
Chris@366
|
24 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
Chris@366
|
25 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
|
Chris@366
|
26 ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
|
Chris@366
|
27 CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
Chris@366
|
28 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
Chris@366
|
29
|
Chris@366
|
30 Except as contained in this notice, the names of Chris Cannam and
|
Chris@366
|
31 Particular Programs Ltd shall not be used in advertising or
|
Chris@366
|
32 otherwise to promote the sale, use or other dealings in this
|
Chris@366
|
33 Software without prior written authorization.
|
Chris@366
|
34 */
|
Chris@366
|
35
|
Chris@372
|
36 #include "VectorOpsComplex.h"
|
Chris@366
|
37
|
Chris@366
|
38 #include <cassert>
|
Chris@366
|
39
|
Chris@366
|
40 #if defined USE_POMMIER_MATHFUN
|
Chris@366
|
41 #if defined __ARMEL__
|
Chris@366
|
42 #include "pommier/neon_mathfun.h"
|
Chris@366
|
43 #else
|
Chris@366
|
44 #include "pommier/sse_mathfun.h"
|
Chris@366
|
45 #endif
|
Chris@366
|
46 #endif
|
Chris@366
|
47
|
Chris@372
|
48 #if defined(_MSC_VER) || defined(WIN32)
|
Chris@366
|
49 #include <malloc.h>
|
Chris@372
|
50 #ifndef alloca
|
Chris@366
|
51 #define alloca _alloca
|
Chris@366
|
52 #endif
|
Chris@372
|
53 #else
|
Chris@372
|
54 #include <alloca.h>
|
Chris@372
|
55 #endif
|
Chris@372
|
56
|
Chris@372
|
57 #include <iostream>
|
Chris@372
|
58
|
Chris@372
|
59 using namespace std;
|
Chris@366
|
60
|
Chris@366
|
61 namespace breakfastquay {
|
Chris@366
|
62
|
Chris@366
|
63 #ifdef USE_APPROXIMATE_ATAN2
|
Chris@366
|
64 float approximate_atan2f(float real, float imag)
|
Chris@366
|
65 {
|
Chris@366
|
66 static const float pi = M_PI;
|
Chris@366
|
67 static const float pi2 = M_PI / 2;
|
Chris@366
|
68
|
Chris@366
|
69 float atan;
|
Chris@366
|
70
|
Chris@366
|
71 if (real == 0.f) {
|
Chris@366
|
72
|
Chris@366
|
73 if (imag > 0.0f) atan = pi2;
|
Chris@366
|
74 else if (imag == 0.0f) atan = 0.0f;
|
Chris@366
|
75 else atan = -pi2;
|
Chris@366
|
76
|
Chris@366
|
77 } else {
|
Chris@366
|
78
|
Chris@366
|
79 float z = imag/real;
|
Chris@366
|
80
|
Chris@366
|
81 if (fabsf(z) < 1.f) {
|
Chris@366
|
82 atan = z / (1.f + 0.28f * z * z);
|
Chris@366
|
83 if (real < 0.f) {
|
Chris@366
|
84 if (imag < 0.f) atan -= pi;
|
Chris@366
|
85 else atan += pi;
|
Chris@366
|
86 }
|
Chris@366
|
87 } else {
|
Chris@366
|
88 atan = pi2 - z / (z * z + 0.28f);
|
Chris@366
|
89 if (imag < 0.f) atan -= pi;
|
Chris@366
|
90 }
|
Chris@366
|
91 }
|
Chris@372
|
92
|
Chris@372
|
93 return atan;
|
Chris@366
|
94 }
|
Chris@366
|
95 #endif
|
Chris@366
|
96
|
Chris@366
|
97 #if defined USE_POMMIER_MATHFUN
|
Chris@366
|
98
|
Chris@366
|
99 #ifdef __ARMEL__
|
Chris@366
|
100 typedef union {
|
Chris@366
|
101 float f[4];
|
Chris@366
|
102 int i[4];
|
Chris@366
|
103 v4sf v;
|
Chris@366
|
104 } V4SF;
|
Chris@366
|
105 #else
|
Chris@366
|
106 typedef ALIGN16_BEG union {
|
Chris@366
|
107 float f[4];
|
Chris@366
|
108 int i[4];
|
Chris@366
|
109 v4sf v;
|
Chris@366
|
110 } ALIGN16_END V4SF;
|
Chris@366
|
111 #endif
|
Chris@366
|
112
|
Chris@366
|
113 void
|
Chris@366
|
114 v_polar_to_cartesian_pommier(float *const BQ_R__ real,
|
Chris@366
|
115 float *const BQ_R__ imag,
|
Chris@366
|
116 const float *const BQ_R__ mag,
|
Chris@366
|
117 const float *const BQ_R__ phase,
|
Chris@366
|
118 const int count)
|
Chris@366
|
119 {
|
Chris@366
|
120 int idx = 0, tidx = 0;
|
Chris@366
|
121 int i = 0;
|
Chris@366
|
122
|
Chris@366
|
123 for (int i = 0; i + 4 < count; i += 4) {
|
Chris@366
|
124
|
Chris@366
|
125 V4SF fmag, fphase, fre, fim;
|
Chris@366
|
126
|
Chris@366
|
127 for (int j = 0; j < 3; ++j) {
|
Chris@366
|
128 fmag.f[j] = mag[idx];
|
Chris@366
|
129 fphase.f[j] = phase[idx++];
|
Chris@366
|
130 }
|
Chris@366
|
131
|
Chris@366
|
132 sincos_ps(fphase.v, &fim.v, &fre.v);
|
Chris@366
|
133
|
Chris@366
|
134 for (int j = 0; j < 3; ++j) {
|
Chris@366
|
135 real[tidx] = fre.f[j] * fmag.f[j];
|
Chris@366
|
136 imag[tidx++] = fim.f[j] * fmag.f[j];
|
Chris@366
|
137 }
|
Chris@366
|
138 }
|
Chris@366
|
139
|
Chris@366
|
140 while (i < count) {
|
Chris@366
|
141 float re, im;
|
Chris@366
|
142 c_phasor(&re, &im, phase[i]);
|
Chris@366
|
143 real[tidx] = re * mag[i];
|
Chris@366
|
144 imag[tidx++] = im * mag[i];
|
Chris@366
|
145 ++i;
|
Chris@366
|
146 }
|
Chris@366
|
147 }
|
Chris@366
|
148
|
Chris@366
|
149 void
|
Chris@366
|
150 v_polar_interleaved_to_cartesian_inplace_pommier(float *const BQ_R__ srcdst,
|
Chris@366
|
151 const int count)
|
Chris@366
|
152 {
|
Chris@366
|
153 int i;
|
Chris@366
|
154 int idx = 0, tidx = 0;
|
Chris@366
|
155
|
Chris@366
|
156 for (i = 0; i + 4 < count; i += 4) {
|
Chris@366
|
157
|
Chris@366
|
158 V4SF fmag, fphase, fre, fim;
|
Chris@366
|
159
|
Chris@366
|
160 for (int j = 0; j < 3; ++j) {
|
Chris@366
|
161 fmag.f[j] = srcdst[idx++];
|
Chris@366
|
162 fphase.f[j] = srcdst[idx++];
|
Chris@366
|
163 }
|
Chris@366
|
164
|
Chris@366
|
165 sincos_ps(fphase.v, &fim.v, &fre.v);
|
Chris@366
|
166
|
Chris@366
|
167 for (int j = 0; j < 3; ++j) {
|
Chris@366
|
168 srcdst[tidx++] = fre.f[j] * fmag.f[j];
|
Chris@366
|
169 srcdst[tidx++] = fim.f[j] * fmag.f[j];
|
Chris@366
|
170 }
|
Chris@366
|
171 }
|
Chris@366
|
172
|
Chris@366
|
173 while (i < count) {
|
Chris@366
|
174 float real, imag;
|
Chris@366
|
175 float mag = srcdst[idx++];
|
Chris@366
|
176 float phase = srcdst[idx++];
|
Chris@366
|
177 c_phasor(&real, &imag, phase);
|
Chris@366
|
178 srcdst[tidx++] = real * mag;
|
Chris@366
|
179 srcdst[tidx++] = imag * mag;
|
Chris@366
|
180 ++i;
|
Chris@366
|
181 }
|
Chris@366
|
182 }
|
Chris@366
|
183
|
Chris@366
|
184 void
|
Chris@366
|
185 v_polar_to_cartesian_interleaved_pommier(float *const BQ_R__ dst,
|
Chris@366
|
186 const float *const BQ_R__ mag,
|
Chris@366
|
187 const float *const BQ_R__ phase,
|
Chris@366
|
188 const int count)
|
Chris@366
|
189 {
|
Chris@366
|
190 int i;
|
Chris@366
|
191 int idx = 0, tidx = 0;
|
Chris@366
|
192
|
Chris@366
|
193 for (i = 0; i + 4 <= count; i += 4) {
|
Chris@366
|
194
|
Chris@366
|
195 V4SF fmag, fphase, fre, fim;
|
Chris@366
|
196
|
Chris@366
|
197 for (int j = 0; j < 3; ++j) {
|
Chris@366
|
198 fmag.f[j] = mag[idx];
|
Chris@366
|
199 fphase.f[j] = phase[idx];
|
Chris@366
|
200 ++idx;
|
Chris@366
|
201 }
|
Chris@366
|
202
|
Chris@366
|
203 sincos_ps(fphase.v, &fim.v, &fre.v);
|
Chris@366
|
204
|
Chris@366
|
205 for (int j = 0; j < 3; ++j) {
|
Chris@366
|
206 dst[tidx++] = fre.f[j] * fmag.f[j];
|
Chris@366
|
207 dst[tidx++] = fim.f[j] * fmag.f[j];
|
Chris@366
|
208 }
|
Chris@366
|
209 }
|
Chris@366
|
210
|
Chris@366
|
211 while (i < count) {
|
Chris@366
|
212 float real, imag;
|
Chris@366
|
213 c_phasor(&real, &imag, phase[i]);
|
Chris@366
|
214 dst[tidx++] = real * mag[i];
|
Chris@366
|
215 dst[tidx++] = imag * mag[i];
|
Chris@366
|
216 ++i;
|
Chris@366
|
217 }
|
Chris@366
|
218 }
|
Chris@366
|
219
|
Chris@366
|
220 #endif
|
Chris@366
|
221
|
Chris@366
|
222 #ifndef NO_COMPLEX_TYPES
|
Chris@366
|
223
|
Chris@366
|
224 void
|
Chris@366
|
225 v_polar_to_cartesian(bq_complex_t *const BQ_R__ dst,
|
Chris@366
|
226 const bq_complex_element_t *const BQ_R__ mag,
|
Chris@366
|
227 const bq_complex_element_t *const BQ_R__ phase,
|
Chris@366
|
228 const int count)
|
Chris@366
|
229 {
|
Chris@366
|
230 if (sizeof(bq_complex_element_t) == sizeof(float)) {
|
Chris@372
|
231 v_polar_to_cartesian_interleaved((float *)dst,
|
Chris@372
|
232 (const float *)mag,
|
Chris@372
|
233 (const float *)phase,
|
Chris@372
|
234 count);
|
Chris@366
|
235 } else {
|
Chris@372
|
236 v_polar_to_cartesian_interleaved((double *)dst,
|
Chris@372
|
237 (const double *)mag,
|
Chris@372
|
238 (const double *)phase,
|
Chris@372
|
239 count);
|
Chris@366
|
240 }
|
Chris@366
|
241 }
|
Chris@366
|
242
|
Chris@372
|
243 void
|
Chris@372
|
244 v_cartesian_to_polar(bq_complex_element_t *const BQ_R__ mag,
|
Chris@372
|
245 bq_complex_element_t *const BQ_R__ phase,
|
Chris@372
|
246 const bq_complex_t *const BQ_R__ src,
|
Chris@372
|
247 const int count)
|
Chris@372
|
248 {
|
Chris@372
|
249 if (sizeof(bq_complex_element_t) == sizeof(float)) {
|
Chris@372
|
250 v_cartesian_interleaved_to_polar((float *)mag,
|
Chris@372
|
251 (float *)phase,
|
Chris@372
|
252 (const float *)src,
|
Chris@372
|
253 count);
|
Chris@372
|
254 } else {
|
Chris@372
|
255 v_cartesian_interleaved_to_polar((double *)mag,
|
Chris@372
|
256 (double *)phase,
|
Chris@372
|
257 (const double *)src,
|
Chris@372
|
258 count);
|
Chris@372
|
259 }
|
Chris@372
|
260 }
|
Chris@366
|
261
|
Chris@366
|
262 void
|
Chris@372
|
263 v_cartesian_to_magnitudes(bq_complex_element_t *const BQ_R__ mag,
|
Chris@372
|
264 const bq_complex_t *const BQ_R__ src,
|
Chris@372
|
265 const int count)
|
Chris@366
|
266 {
|
Chris@366
|
267 if (sizeof(bq_complex_element_t) == sizeof(float)) {
|
Chris@372
|
268 v_cartesian_interleaved_to_magnitudes((float *)mag,
|
Chris@372
|
269 (const float *)src,
|
Chris@372
|
270 count);
|
Chris@366
|
271 } else {
|
Chris@372
|
272 v_cartesian_interleaved_to_magnitudes((double *)mag,
|
Chris@372
|
273 (const double *)src,
|
Chris@372
|
274 count);
|
Chris@366
|
275 }
|
Chris@372
|
276 }
|
Chris@366
|
277
|
Chris@366
|
278 #if defined USE_POMMIER_MATHFUN
|
Chris@366
|
279
|
Chris@366
|
280 //!!! further tests reqd. This is only single precision but it seems
|
Chris@366
|
281 //!!! to be much faster than normal math library sincos. The comments
|
Chris@366
|
282 //!!! note that precision suffers for high arguments to sincos though,
|
Chris@366
|
283 //!!! and that is probably a common case for us
|
Chris@366
|
284
|
Chris@366
|
285 void
|
Chris@366
|
286 v_polar_interleaved_to_cartesian(bq_complex_t *const BQ_R__ dst,
|
Chris@366
|
287 const bq_complex_element_t *const BQ_R__ src,
|
Chris@366
|
288 const int count)
|
Chris@366
|
289 {
|
Chris@366
|
290 int idx = 0, tidx = 0;
|
Chris@366
|
291
|
Chris@366
|
292 for (int i = 0; i < count; i += 4) {
|
Chris@366
|
293
|
Chris@366
|
294 V4SF fmag, fphase, fre, fim;
|
Chris@366
|
295
|
Chris@366
|
296 for (int j = 0; j < 3; ++j) {
|
Chris@366
|
297 fmag.f[j] = src[idx++];
|
Chris@366
|
298 fphase.f[j] = src[idx++];
|
Chris@366
|
299 }
|
Chris@366
|
300
|
Chris@366
|
301 sincos_ps(fphase.v, &fim.v, &fre.v);
|
Chris@366
|
302
|
Chris@366
|
303 for (int j = 0; j < 3; ++j) {
|
Chris@366
|
304 dst[tidx].re = fre.f[j] * fmag.f[j];
|
Chris@366
|
305 dst[tidx++].im = fim.f[j] * fmag.f[j];
|
Chris@366
|
306 }
|
Chris@366
|
307 }
|
Chris@366
|
308 }
|
Chris@366
|
309
|
Chris@366
|
310 #elif (defined HAVE_IPP || defined HAVE_VDSP)
|
Chris@366
|
311
|
Chris@366
|
312 // with a vector library, it should be faster to deinterleave and call
|
Chris@366
|
313 // the basic fn
|
Chris@366
|
314
|
Chris@366
|
315 void
|
Chris@366
|
316 v_polar_interleaved_to_cartesian(bq_complex_t *const BQ_R__ dst,
|
Chris@366
|
317 const bq_complex_element_t *const BQ_R__ src,
|
Chris@366
|
318 const int count)
|
Chris@366
|
319 {
|
Chris@366
|
320 bq_complex_element_t *mag = (bq_complex_element_t *)
|
Chris@366
|
321 alloca(count * sizeof(bq_complex_element_t));
|
Chris@366
|
322 bq_complex_element_t *phase = (bq_complex_element_t *)
|
Chris@366
|
323 alloca(count * sizeof(bq_complex_element_t));
|
Chris@366
|
324 bq_complex_element_t *magphase[] = { mag, phase };
|
Chris@366
|
325
|
Chris@366
|
326 v_deinterleave(magphase, src, 2, count);
|
Chris@366
|
327 v_polar_to_cartesian(dst, mag, phase, count);
|
Chris@366
|
328 }
|
Chris@366
|
329
|
Chris@366
|
330 #else
|
Chris@366
|
331
|
Chris@366
|
332 // without a vector library, better avoid the deinterleave step
|
Chris@366
|
333
|
Chris@366
|
334 void
|
Chris@366
|
335 v_polar_interleaved_to_cartesian(bq_complex_t *const BQ_R__ dst,
|
Chris@366
|
336 const bq_complex_element_t *const BQ_R__ src,
|
Chris@366
|
337 const int count)
|
Chris@366
|
338 {
|
Chris@366
|
339 bq_complex_element_t mag, phase;
|
Chris@366
|
340 int idx = 0;
|
Chris@366
|
341 for (int i = 0; i < count; ++i) {
|
Chris@366
|
342 mag = src[idx++];
|
Chris@366
|
343 phase = src[idx++];
|
Chris@366
|
344 dst[i] = c_phasor(phase);
|
Chris@366
|
345 dst[i].re *= mag;
|
Chris@366
|
346 dst[i].im *= mag;
|
Chris@366
|
347 }
|
Chris@366
|
348 }
|
Chris@366
|
349
|
Chris@366
|
350 #endif
|
Chris@366
|
351
|
Chris@366
|
352 void
|
Chris@366
|
353 v_polar_interleaved_to_cartesian_inplace(bq_complex_element_t *const BQ_R__ srcdst,
|
Chris@366
|
354 const int count)
|
Chris@366
|
355 {
|
Chris@366
|
356 bq_complex_element_t mag, phase;
|
Chris@366
|
357 int ii = 0, io = 0;
|
Chris@366
|
358 for (int i = 0; i < count; ++i) {
|
Chris@366
|
359 mag = srcdst[ii++];
|
Chris@366
|
360 phase = srcdst[ii++];
|
Chris@366
|
361 bq_complex_t p = c_phasor(phase);
|
Chris@366
|
362 srcdst[io++] = mag * p.re;
|
Chris@366
|
363 srcdst[io++] = mag * p.im;
|
Chris@366
|
364 }
|
Chris@366
|
365 }
|
Chris@366
|
366
|
Chris@372
|
367 #endif // NO_COMPLEX_TYPES
|
Chris@366
|
368
|
Chris@366
|
369 }
|