Chris@29
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
Chris@29
|
2
|
Chris@29
|
3 /*
|
Chris@29
|
4 bqvec
|
Chris@29
|
5
|
Chris@29
|
6 A small library for vector arithmetic and allocation in C++ using
|
Chris@29
|
7 raw C pointer arrays.
|
Chris@29
|
8
|
Chris@29
|
9 Copyright 2007-2015 Particular Programs Ltd.
|
Chris@29
|
10
|
Chris@29
|
11 Permission is hereby granted, free of charge, to any person
|
Chris@29
|
12 obtaining a copy of this software and associated documentation
|
Chris@29
|
13 files (the "Software"), to deal in the Software without
|
Chris@29
|
14 restriction, including without limitation the rights to use, copy,
|
Chris@29
|
15 modify, merge, publish, distribute, sublicense, and/or sell copies
|
Chris@29
|
16 of the Software, and to permit persons to whom the Software is
|
Chris@29
|
17 furnished to do so, subject to the following conditions:
|
Chris@29
|
18
|
Chris@29
|
19 The above copyright notice and this permission notice shall be
|
Chris@29
|
20 included in all copies or substantial portions of the Software.
|
Chris@29
|
21
|
Chris@29
|
22 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
Chris@29
|
23 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
Chris@29
|
24 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
Chris@29
|
25 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
|
Chris@29
|
26 ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
|
Chris@29
|
27 CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
Chris@29
|
28 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
Chris@29
|
29
|
Chris@29
|
30 Except as contained in this notice, the names of Chris Cannam and
|
Chris@29
|
31 Particular Programs Ltd shall not be used in advertising or
|
Chris@29
|
32 otherwise to promote the sale, use or other dealings in this
|
Chris@29
|
33 Software without prior written authorization.
|
Chris@29
|
34 */
|
Chris@29
|
35
|
Chris@29
|
36 #include "VectorOpsComplex.h"
|
Chris@29
|
37
|
Chris@29
|
38 #include <cassert>
|
Chris@29
|
39
|
Chris@29
|
40 #ifdef __MSVC__
|
Chris@29
|
41 #include <malloc.h>
|
Chris@29
|
42 #define alloca _alloca
|
Chris@29
|
43 #endif
|
Chris@29
|
44
|
Chris@29
|
45 #if defined USE_POMMIER_MATHFUN
|
Chris@29
|
46 #if defined __ARMEL__
|
Chris@29
|
47 #include "pommier/neon_mathfun.h"
|
Chris@29
|
48 #else
|
Chris@29
|
49 #include "pommier/sse_mathfun.h"
|
Chris@29
|
50 #endif
|
Chris@29
|
51 #endif
|
Chris@29
|
52
|
Chris@29
|
53 #ifdef __MSVC__
|
Chris@29
|
54 #include <malloc.h>
|
Chris@29
|
55 #define alloca _alloca
|
Chris@29
|
56 #endif
|
Chris@29
|
57
|
Chris@29
|
58 namespace breakfastquay {
|
Chris@29
|
59
|
Chris@29
|
60 #ifdef USE_APPROXIMATE_ATAN2
|
Chris@29
|
61 float approximate_atan2f(float real, float imag)
|
Chris@29
|
62 {
|
Chris@29
|
63 static const float pi = M_PI;
|
Chris@29
|
64 static const float pi2 = M_PI / 2;
|
Chris@29
|
65
|
Chris@29
|
66 float atan;
|
Chris@29
|
67
|
Chris@29
|
68 if (real == 0.f) {
|
Chris@29
|
69
|
Chris@29
|
70 if (imag > 0.0f) atan = pi2;
|
Chris@29
|
71 else if (imag == 0.0f) atan = 0.0f;
|
Chris@29
|
72 else atan = -pi2;
|
Chris@29
|
73
|
Chris@29
|
74 } else {
|
Chris@29
|
75
|
Chris@29
|
76 float z = imag/real;
|
Chris@29
|
77
|
Chris@29
|
78 if (fabsf(z) < 1.f) {
|
Chris@29
|
79 atan = z / (1.f + 0.28f * z * z);
|
Chris@29
|
80 if (real < 0.f) {
|
Chris@29
|
81 if (imag < 0.f) atan -= pi;
|
Chris@29
|
82 else atan += pi;
|
Chris@29
|
83 }
|
Chris@29
|
84 } else {
|
Chris@29
|
85 atan = pi2 - z / (z * z + 0.28f);
|
Chris@29
|
86 if (imag < 0.f) atan -= pi;
|
Chris@29
|
87 }
|
Chris@29
|
88 }
|
Chris@29
|
89 }
|
Chris@29
|
90 #endif
|
Chris@29
|
91
|
Chris@29
|
92 #if defined USE_POMMIER_MATHFUN
|
Chris@29
|
93
|
Chris@29
|
94 #ifdef __ARMEL__
|
Chris@29
|
95 typedef union {
|
Chris@29
|
96 float f[4];
|
Chris@29
|
97 int i[4];
|
Chris@29
|
98 v4sf v;
|
Chris@29
|
99 } V4SF;
|
Chris@29
|
100 #else
|
Chris@29
|
101 typedef ALIGN16_BEG union {
|
Chris@29
|
102 float f[4];
|
Chris@29
|
103 int i[4];
|
Chris@29
|
104 v4sf v;
|
Chris@29
|
105 } ALIGN16_END V4SF;
|
Chris@29
|
106 #endif
|
Chris@29
|
107
|
Chris@29
|
108 void
|
Chris@29
|
109 v_polar_to_cartesian_pommier(float *const BQ_R__ real,
|
Chris@29
|
110 float *const BQ_R__ imag,
|
Chris@29
|
111 const float *const BQ_R__ mag,
|
Chris@29
|
112 const float *const BQ_R__ phase,
|
Chris@29
|
113 const int count)
|
Chris@29
|
114 {
|
Chris@29
|
115 int idx = 0, tidx = 0;
|
Chris@29
|
116 int i = 0;
|
Chris@29
|
117
|
Chris@29
|
118 for (int i = 0; i + 4 < count; i += 4) {
|
Chris@29
|
119
|
Chris@29
|
120 V4SF fmag, fphase, fre, fim;
|
Chris@29
|
121
|
Chris@29
|
122 for (int j = 0; j < 3; ++j) {
|
Chris@29
|
123 fmag.f[j] = mag[idx];
|
Chris@29
|
124 fphase.f[j] = phase[idx++];
|
Chris@29
|
125 }
|
Chris@29
|
126
|
Chris@29
|
127 sincos_ps(fphase.v, &fim.v, &fre.v);
|
Chris@29
|
128
|
Chris@29
|
129 for (int j = 0; j < 3; ++j) {
|
Chris@29
|
130 real[tidx] = fre.f[j] * fmag.f[j];
|
Chris@29
|
131 imag[tidx++] = fim.f[j] * fmag.f[j];
|
Chris@29
|
132 }
|
Chris@29
|
133 }
|
Chris@29
|
134
|
Chris@29
|
135 while (i < count) {
|
Chris@29
|
136 float re, im;
|
Chris@29
|
137 c_phasor(&re, &im, phase[i]);
|
Chris@29
|
138 real[tidx] = re * mag[i];
|
Chris@29
|
139 imag[tidx++] = im * mag[i];
|
Chris@29
|
140 ++i;
|
Chris@29
|
141 }
|
Chris@29
|
142 }
|
Chris@29
|
143
|
Chris@29
|
144 void
|
Chris@29
|
145 v_polar_interleaved_to_cartesian_inplace_pommier(float *const BQ_R__ srcdst,
|
Chris@29
|
146 const int count)
|
Chris@29
|
147 {
|
Chris@29
|
148 int i;
|
Chris@29
|
149 int idx = 0, tidx = 0;
|
Chris@29
|
150
|
Chris@29
|
151 for (i = 0; i + 4 < count; i += 4) {
|
Chris@29
|
152
|
Chris@29
|
153 V4SF fmag, fphase, fre, fim;
|
Chris@29
|
154
|
Chris@29
|
155 for (int j = 0; j < 3; ++j) {
|
Chris@29
|
156 fmag.f[j] = srcdst[idx++];
|
Chris@29
|
157 fphase.f[j] = srcdst[idx++];
|
Chris@29
|
158 }
|
Chris@29
|
159
|
Chris@29
|
160 sincos_ps(fphase.v, &fim.v, &fre.v);
|
Chris@29
|
161
|
Chris@29
|
162 for (int j = 0; j < 3; ++j) {
|
Chris@29
|
163 srcdst[tidx++] = fre.f[j] * fmag.f[j];
|
Chris@29
|
164 srcdst[tidx++] = fim.f[j] * fmag.f[j];
|
Chris@29
|
165 }
|
Chris@29
|
166 }
|
Chris@29
|
167
|
Chris@29
|
168 while (i < count) {
|
Chris@29
|
169 float real, imag;
|
Chris@29
|
170 float mag = srcdst[idx++];
|
Chris@29
|
171 float phase = srcdst[idx++];
|
Chris@29
|
172 c_phasor(&real, &imag, phase);
|
Chris@29
|
173 srcdst[tidx++] = real * mag;
|
Chris@29
|
174 srcdst[tidx++] = imag * mag;
|
Chris@29
|
175 ++i;
|
Chris@29
|
176 }
|
Chris@29
|
177 }
|
Chris@29
|
178
|
Chris@29
|
179 void
|
Chris@29
|
180 v_polar_to_cartesian_interleaved_pommier(float *const BQ_R__ dst,
|
Chris@29
|
181 const float *const BQ_R__ mag,
|
Chris@29
|
182 const float *const BQ_R__ phase,
|
Chris@29
|
183 const int count)
|
Chris@29
|
184 {
|
Chris@29
|
185 int i;
|
Chris@29
|
186 int idx = 0, tidx = 0;
|
Chris@29
|
187
|
Chris@29
|
188 for (i = 0; i + 4 <= count; i += 4) {
|
Chris@29
|
189
|
Chris@29
|
190 V4SF fmag, fphase, fre, fim;
|
Chris@29
|
191
|
Chris@29
|
192 for (int j = 0; j < 3; ++j) {
|
Chris@29
|
193 fmag.f[j] = mag[idx];
|
Chris@29
|
194 fphase.f[j] = phase[idx];
|
Chris@29
|
195 ++idx;
|
Chris@29
|
196 }
|
Chris@29
|
197
|
Chris@29
|
198 sincos_ps(fphase.v, &fim.v, &fre.v);
|
Chris@29
|
199
|
Chris@29
|
200 for (int j = 0; j < 3; ++j) {
|
Chris@29
|
201 dst[tidx++] = fre.f[j] * fmag.f[j];
|
Chris@29
|
202 dst[tidx++] = fim.f[j] * fmag.f[j];
|
Chris@29
|
203 }
|
Chris@29
|
204 }
|
Chris@29
|
205
|
Chris@29
|
206 while (i < count) {
|
Chris@29
|
207 float real, imag;
|
Chris@29
|
208 c_phasor(&real, &imag, phase[i]);
|
Chris@29
|
209 dst[tidx++] = real * mag[i];
|
Chris@29
|
210 dst[tidx++] = imag * mag[i];
|
Chris@29
|
211 ++i;
|
Chris@29
|
212 }
|
Chris@29
|
213 }
|
Chris@29
|
214
|
Chris@29
|
215 #endif
|
Chris@29
|
216
|
Chris@29
|
217 #ifndef NO_COMPLEX_TYPES
|
Chris@29
|
218
|
Chris@29
|
219 #if defined HAVE_IPP
|
Chris@29
|
220
|
Chris@29
|
221 void
|
Chris@29
|
222 v_polar_to_cartesian(bq_complex_t *const BQ_R__ dst,
|
Chris@29
|
223 const bq_complex_element_t *const BQ_R__ mag,
|
Chris@29
|
224 const bq_complex_element_t *const BQ_R__ phase,
|
Chris@29
|
225 const int count)
|
Chris@29
|
226 {
|
Chris@29
|
227 if (sizeof(bq_complex_element_t) == sizeof(float)) {
|
Chris@29
|
228 ippsPolarToCart_32fc((const float *)mag, (const float *)phase,
|
Chris@29
|
229 (Ipp32fc *)dst, count);
|
Chris@29
|
230 } else {
|
Chris@29
|
231 ippsPolarToCart_64fc((const double *)mag, (const double *)phase,
|
Chris@29
|
232 (Ipp64fc *)dst, count);
|
Chris@29
|
233 }
|
Chris@29
|
234 }
|
Chris@29
|
235
|
Chris@29
|
236 #elif defined HAVE_VDSP
|
Chris@29
|
237
|
Chris@29
|
238 void
|
Chris@29
|
239 v_polar_to_cartesian(bq_complex_t *const BQ_R__ dst,
|
Chris@29
|
240 const bq_complex_element_t *const BQ_R__ mag,
|
Chris@29
|
241 const bq_complex_element_t *const BQ_R__ phase,
|
Chris@29
|
242 const int count)
|
Chris@29
|
243 {
|
Chris@29
|
244 bq_complex_element_t *sc = (bq_complex_element_t *)
|
Chris@29
|
245 alloca(count * 2 * sizeof(bq_complex_element_t));
|
Chris@29
|
246
|
Chris@29
|
247 if (sizeof(bq_complex_element_t) == sizeof(float)) {
|
Chris@29
|
248 vvsincosf((float *)sc, (float *)(sc + count), (float *)phase, &count);
|
Chris@29
|
249 } else {
|
Chris@29
|
250 vvsincos((double *)sc, (double *)(sc + count), (double *)phase, &count);
|
Chris@29
|
251 }
|
Chris@29
|
252
|
Chris@29
|
253 int sini = 0;
|
Chris@29
|
254 int cosi = count;
|
Chris@29
|
255
|
Chris@29
|
256 for (int i = 0; i < count; ++i) {
|
Chris@29
|
257 dst[i].re = mag[i] * sc[cosi++];
|
Chris@29
|
258 dst[i].im = mag[i] * sc[sini++];
|
Chris@29
|
259 }
|
Chris@29
|
260 }
|
Chris@29
|
261
|
Chris@29
|
262 #else
|
Chris@29
|
263
|
Chris@29
|
264 void
|
Chris@29
|
265 v_polar_to_cartesian(bq_complex_t *const BQ_R__ dst,
|
Chris@29
|
266 const bq_complex_element_t *const BQ_R__ mag,
|
Chris@29
|
267 const bq_complex_element_t *const BQ_R__ phase,
|
Chris@29
|
268 const int count)
|
Chris@29
|
269 {
|
Chris@29
|
270 for (int i = 0; i < count; ++i) {
|
Chris@29
|
271 dst[i] = c_phasor(phase[i]);
|
Chris@29
|
272 }
|
Chris@29
|
273 for (int i = 0; i < count; ++i) {
|
Chris@29
|
274 dst[i].re *= mag[i];
|
Chris@29
|
275 dst[i].im *= mag[i];
|
Chris@29
|
276 }
|
Chris@29
|
277 }
|
Chris@29
|
278
|
Chris@29
|
279 #endif
|
Chris@29
|
280
|
Chris@29
|
281 #if defined USE_POMMIER_MATHFUN
|
Chris@29
|
282
|
Chris@29
|
283 //!!! further tests reqd. This is only single precision but it seems
|
Chris@29
|
284 //!!! to be much faster than normal math library sincos. The comments
|
Chris@29
|
285 //!!! note that precision suffers for high arguments to sincos though,
|
Chris@29
|
286 //!!! and that is probably a common case for us
|
Chris@29
|
287
|
Chris@29
|
288 void
|
Chris@29
|
289 v_polar_interleaved_to_cartesian(bq_complex_t *const BQ_R__ dst,
|
Chris@29
|
290 const bq_complex_element_t *const BQ_R__ src,
|
Chris@29
|
291 const int count)
|
Chris@29
|
292 {
|
Chris@29
|
293 int idx = 0, tidx = 0;
|
Chris@29
|
294
|
Chris@29
|
295 for (int i = 0; i < count; i += 4) {
|
Chris@29
|
296
|
Chris@29
|
297 V4SF fmag, fphase, fre, fim;
|
Chris@29
|
298
|
Chris@29
|
299 for (int j = 0; j < 3; ++j) {
|
Chris@29
|
300 fmag.f[j] = src[idx++];
|
Chris@29
|
301 fphase.f[j] = src[idx++];
|
Chris@29
|
302 }
|
Chris@29
|
303
|
Chris@29
|
304 sincos_ps(fphase.v, &fim.v, &fre.v);
|
Chris@29
|
305
|
Chris@29
|
306 for (int j = 0; j < 3; ++j) {
|
Chris@29
|
307 dst[tidx].re = fre.f[j] * fmag.f[j];
|
Chris@29
|
308 dst[tidx++].im = fim.f[j] * fmag.f[j];
|
Chris@29
|
309 }
|
Chris@29
|
310 }
|
Chris@29
|
311 }
|
Chris@29
|
312
|
Chris@29
|
313 #elif (defined HAVE_IPP || defined HAVE_VDSP)
|
Chris@29
|
314
|
Chris@29
|
315 // with a vector library, it should be faster to deinterleave and call
|
Chris@29
|
316 // the basic fn
|
Chris@29
|
317
|
Chris@29
|
318 void
|
Chris@29
|
319 v_polar_interleaved_to_cartesian(bq_complex_t *const BQ_R__ dst,
|
Chris@29
|
320 const bq_complex_element_t *const BQ_R__ src,
|
Chris@29
|
321 const int count)
|
Chris@29
|
322 {
|
Chris@29
|
323 bq_complex_element_t *mag = (bq_complex_element_t *)
|
Chris@29
|
324 alloca(count * sizeof(bq_complex_element_t));
|
Chris@29
|
325 bq_complex_element_t *phase = (bq_complex_element_t *)
|
Chris@29
|
326 alloca(count * sizeof(bq_complex_element_t));
|
Chris@29
|
327 bq_complex_element_t *magphase[] = { mag, phase };
|
Chris@29
|
328
|
Chris@29
|
329 v_deinterleave(magphase, src, 2, count);
|
Chris@29
|
330 v_polar_to_cartesian(dst, mag, phase, count);
|
Chris@29
|
331 }
|
Chris@29
|
332
|
Chris@29
|
333 #else
|
Chris@29
|
334
|
Chris@29
|
335 // without a vector library, better avoid the deinterleave step
|
Chris@29
|
336
|
Chris@29
|
337 void
|
Chris@29
|
338 v_polar_interleaved_to_cartesian(bq_complex_t *const BQ_R__ dst,
|
Chris@29
|
339 const bq_complex_element_t *const BQ_R__ src,
|
Chris@29
|
340 const int count)
|
Chris@29
|
341 {
|
Chris@29
|
342 bq_complex_element_t mag, phase;
|
Chris@29
|
343 int idx = 0;
|
Chris@29
|
344 for (int i = 0; i < count; ++i) {
|
Chris@29
|
345 mag = src[idx++];
|
Chris@29
|
346 phase = src[idx++];
|
Chris@29
|
347 dst[i] = c_phasor(phase);
|
Chris@29
|
348 dst[i].re *= mag;
|
Chris@29
|
349 dst[i].im *= mag;
|
Chris@29
|
350 }
|
Chris@29
|
351 }
|
Chris@29
|
352
|
Chris@29
|
353 #endif
|
Chris@29
|
354
|
Chris@29
|
355 void
|
Chris@29
|
356 v_polar_interleaved_to_cartesian_inplace(bq_complex_element_t *const BQ_R__ srcdst,
|
Chris@29
|
357 const int count)
|
Chris@29
|
358 {
|
Chris@29
|
359 // Not ideal
|
Chris@29
|
360 bq_complex_element_t mag, phase;
|
Chris@29
|
361 int ii = 0, io = 0;
|
Chris@29
|
362 for (int i = 0; i < count; ++i) {
|
Chris@29
|
363 mag = srcdst[ii++];
|
Chris@29
|
364 phase = srcdst[ii++];
|
Chris@29
|
365 bq_complex_t p = c_phasor(phase);
|
Chris@29
|
366 srcdst[io++] = mag * p.re;
|
Chris@29
|
367 srcdst[io++] = mag * p.im;
|
Chris@29
|
368 }
|
Chris@29
|
369 }
|
Chris@29
|
370
|
Chris@29
|
371 #endif
|
Chris@29
|
372
|
Chris@29
|
373 }
|