comparison bqvec/src/VectorOpsComplex.cpp @ 366:5d0a2ebb4d17

Bring dependent libraries in to repo
author Chris Cannam
date Fri, 24 Jun 2016 14:47:45 +0100
parents
children af71cbdab621
comparison
equal deleted inserted replaced
365:112766f4c34b 366:5d0a2ebb4d17
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
2
3 /*
4 bqvec
5
6 A small library for vector arithmetic and allocation in C++ using
7 raw C pointer arrays.
8
9 Copyright 2007-2015 Particular Programs Ltd.
10
11 Permission is hereby granted, free of charge, to any person
12 obtaining a copy of this software and associated documentation
13 files (the "Software"), to deal in the Software without
14 restriction, including without limitation the rights to use, copy,
15 modify, merge, publish, distribute, sublicense, and/or sell copies
16 of the Software, and to permit persons to whom the Software is
17 furnished to do so, subject to the following conditions:
18
19 The above copyright notice and this permission notice shall be
20 included in all copies or substantial portions of the Software.
21
22 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
25 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
26 ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
27 CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
28 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29
30 Except as contained in this notice, the names of Chris Cannam and
31 Particular Programs Ltd shall not be used in advertising or
32 otherwise to promote the sale, use or other dealings in this
33 Software without prior written authorization.
34 */
35
36 #include "bqvec/VectorOpsComplex.h"
37
38 #include <cassert>
39
40 #ifdef __MSVC__
41 #include <malloc.h>
42 #define alloca _alloca
43 #endif
44
45 #if defined USE_POMMIER_MATHFUN
46 #if defined __ARMEL__
47 #include "pommier/neon_mathfun.h"
48 #else
49 #include "pommier/sse_mathfun.h"
50 #endif
51 #endif
52
53 #ifdef __MSVC__
54 #include <malloc.h>
55 #define alloca _alloca
56 #endif
57
58 namespace breakfastquay {
59
60 #ifdef USE_APPROXIMATE_ATAN2
61 float approximate_atan2f(float real, float imag)
62 {
63 static const float pi = M_PI;
64 static const float pi2 = M_PI / 2;
65
66 float atan;
67
68 if (real == 0.f) {
69
70 if (imag > 0.0f) atan = pi2;
71 else if (imag == 0.0f) atan = 0.0f;
72 else atan = -pi2;
73
74 } else {
75
76 float z = imag/real;
77
78 if (fabsf(z) < 1.f) {
79 atan = z / (1.f + 0.28f * z * z);
80 if (real < 0.f) {
81 if (imag < 0.f) atan -= pi;
82 else atan += pi;
83 }
84 } else {
85 atan = pi2 - z / (z * z + 0.28f);
86 if (imag < 0.f) atan -= pi;
87 }
88 }
89 }
90 #endif
91
92 #if defined USE_POMMIER_MATHFUN
93
94 #ifdef __ARMEL__
95 typedef union {
96 float f[4];
97 int i[4];
98 v4sf v;
99 } V4SF;
100 #else
101 typedef ALIGN16_BEG union {
102 float f[4];
103 int i[4];
104 v4sf v;
105 } ALIGN16_END V4SF;
106 #endif
107
108 void
109 v_polar_to_cartesian_pommier(float *const BQ_R__ real,
110 float *const BQ_R__ imag,
111 const float *const BQ_R__ mag,
112 const float *const BQ_R__ phase,
113 const int count)
114 {
115 int idx = 0, tidx = 0;
116 int i = 0;
117
118 for (int i = 0; i + 4 < count; i += 4) {
119
120 V4SF fmag, fphase, fre, fim;
121
122 for (int j = 0; j < 3; ++j) {
123 fmag.f[j] = mag[idx];
124 fphase.f[j] = phase[idx++];
125 }
126
127 sincos_ps(fphase.v, &fim.v, &fre.v);
128
129 for (int j = 0; j < 3; ++j) {
130 real[tidx] = fre.f[j] * fmag.f[j];
131 imag[tidx++] = fim.f[j] * fmag.f[j];
132 }
133 }
134
135 while (i < count) {
136 float re, im;
137 c_phasor(&re, &im, phase[i]);
138 real[tidx] = re * mag[i];
139 imag[tidx++] = im * mag[i];
140 ++i;
141 }
142 }
143
144 void
145 v_polar_interleaved_to_cartesian_inplace_pommier(float *const BQ_R__ srcdst,
146 const int count)
147 {
148 int i;
149 int idx = 0, tidx = 0;
150
151 for (i = 0; i + 4 < count; i += 4) {
152
153 V4SF fmag, fphase, fre, fim;
154
155 for (int j = 0; j < 3; ++j) {
156 fmag.f[j] = srcdst[idx++];
157 fphase.f[j] = srcdst[idx++];
158 }
159
160 sincos_ps(fphase.v, &fim.v, &fre.v);
161
162 for (int j = 0; j < 3; ++j) {
163 srcdst[tidx++] = fre.f[j] * fmag.f[j];
164 srcdst[tidx++] = fim.f[j] * fmag.f[j];
165 }
166 }
167
168 while (i < count) {
169 float real, imag;
170 float mag = srcdst[idx++];
171 float phase = srcdst[idx++];
172 c_phasor(&real, &imag, phase);
173 srcdst[tidx++] = real * mag;
174 srcdst[tidx++] = imag * mag;
175 ++i;
176 }
177 }
178
179 void
180 v_polar_to_cartesian_interleaved_pommier(float *const BQ_R__ dst,
181 const float *const BQ_R__ mag,
182 const float *const BQ_R__ phase,
183 const int count)
184 {
185 int i;
186 int idx = 0, tidx = 0;
187
188 for (i = 0; i + 4 <= count; i += 4) {
189
190 V4SF fmag, fphase, fre, fim;
191
192 for (int j = 0; j < 3; ++j) {
193 fmag.f[j] = mag[idx];
194 fphase.f[j] = phase[idx];
195 ++idx;
196 }
197
198 sincos_ps(fphase.v, &fim.v, &fre.v);
199
200 for (int j = 0; j < 3; ++j) {
201 dst[tidx++] = fre.f[j] * fmag.f[j];
202 dst[tidx++] = fim.f[j] * fmag.f[j];
203 }
204 }
205
206 while (i < count) {
207 float real, imag;
208 c_phasor(&real, &imag, phase[i]);
209 dst[tidx++] = real * mag[i];
210 dst[tidx++] = imag * mag[i];
211 ++i;
212 }
213 }
214
215 #endif
216
217 #ifndef NO_COMPLEX_TYPES
218
219 #if defined HAVE_IPP
220
221 void
222 v_polar_to_cartesian(bq_complex_t *const BQ_R__ dst,
223 const bq_complex_element_t *const BQ_R__ mag,
224 const bq_complex_element_t *const BQ_R__ phase,
225 const int count)
226 {
227 if (sizeof(bq_complex_element_t) == sizeof(float)) {
228 ippsPolarToCart_32fc((const float *)mag, (const float *)phase,
229 (Ipp32fc *)dst, count);
230 } else {
231 ippsPolarToCart_64fc((const double *)mag, (const double *)phase,
232 (Ipp64fc *)dst, count);
233 }
234 }
235
236 #elif defined HAVE_VDSP
237
238 void
239 v_polar_to_cartesian(bq_complex_t *const BQ_R__ dst,
240 const bq_complex_element_t *const BQ_R__ mag,
241 const bq_complex_element_t *const BQ_R__ phase,
242 const int count)
243 {
244 bq_complex_element_t *sc = (bq_complex_element_t *)
245 alloca(count * 2 * sizeof(bq_complex_element_t));
246
247 if (sizeof(bq_complex_element_t) == sizeof(float)) {
248 vvsincosf((float *)sc, (float *)(sc + count), (float *)phase, &count);
249 } else {
250 vvsincos((double *)sc, (double *)(sc + count), (double *)phase, &count);
251 }
252
253 int sini = 0;
254 int cosi = count;
255
256 for (int i = 0; i < count; ++i) {
257 dst[i].re = mag[i] * sc[cosi++];
258 dst[i].im = mag[i] * sc[sini++];
259 }
260 }
261
262 #else
263
264 void
265 v_polar_to_cartesian(bq_complex_t *const BQ_R__ dst,
266 const bq_complex_element_t *const BQ_R__ mag,
267 const bq_complex_element_t *const BQ_R__ phase,
268 const int count)
269 {
270 for (int i = 0; i < count; ++i) {
271 dst[i] = c_phasor(phase[i]);
272 }
273 for (int i = 0; i < count; ++i) {
274 dst[i].re *= mag[i];
275 dst[i].im *= mag[i];
276 }
277 }
278
279 #endif
280
281 #if defined USE_POMMIER_MATHFUN
282
283 //!!! further tests reqd. This is only single precision but it seems
284 //!!! to be much faster than normal math library sincos. The comments
285 //!!! note that precision suffers for high arguments to sincos though,
286 //!!! and that is probably a common case for us
287
288 void
289 v_polar_interleaved_to_cartesian(bq_complex_t *const BQ_R__ dst,
290 const bq_complex_element_t *const BQ_R__ src,
291 const int count)
292 {
293 int idx = 0, tidx = 0;
294
295 for (int i = 0; i < count; i += 4) {
296
297 V4SF fmag, fphase, fre, fim;
298
299 for (int j = 0; j < 3; ++j) {
300 fmag.f[j] = src[idx++];
301 fphase.f[j] = src[idx++];
302 }
303
304 sincos_ps(fphase.v, &fim.v, &fre.v);
305
306 for (int j = 0; j < 3; ++j) {
307 dst[tidx].re = fre.f[j] * fmag.f[j];
308 dst[tidx++].im = fim.f[j] * fmag.f[j];
309 }
310 }
311 }
312
313 #elif (defined HAVE_IPP || defined HAVE_VDSP)
314
315 // with a vector library, it should be faster to deinterleave and call
316 // the basic fn
317
318 void
319 v_polar_interleaved_to_cartesian(bq_complex_t *const BQ_R__ dst,
320 const bq_complex_element_t *const BQ_R__ src,
321 const int count)
322 {
323 bq_complex_element_t *mag = (bq_complex_element_t *)
324 alloca(count * sizeof(bq_complex_element_t));
325 bq_complex_element_t *phase = (bq_complex_element_t *)
326 alloca(count * sizeof(bq_complex_element_t));
327 bq_complex_element_t *magphase[] = { mag, phase };
328
329 v_deinterleave(magphase, src, 2, count);
330 v_polar_to_cartesian(dst, mag, phase, count);
331 }
332
333 #else
334
335 // without a vector library, better avoid the deinterleave step
336
337 void
338 v_polar_interleaved_to_cartesian(bq_complex_t *const BQ_R__ dst,
339 const bq_complex_element_t *const BQ_R__ src,
340 const int count)
341 {
342 bq_complex_element_t mag, phase;
343 int idx = 0;
344 for (int i = 0; i < count; ++i) {
345 mag = src[idx++];
346 phase = src[idx++];
347 dst[i] = c_phasor(phase);
348 dst[i].re *= mag;
349 dst[i].im *= mag;
350 }
351 }
352
353 #endif
354
355 void
356 v_polar_interleaved_to_cartesian_inplace(bq_complex_element_t *const BQ_R__ srcdst,
357 const int count)
358 {
359 // Not ideal
360 bq_complex_element_t mag, phase;
361 int ii = 0, io = 0;
362 for (int i = 0; i < count; ++i) {
363 mag = srcdst[ii++];
364 phase = srcdst[ii++];
365 bq_complex_t p = c_phasor(phase);
366 srcdst[io++] = mag * p.re;
367 srcdst[io++] = mag * p.im;
368 }
369 }
370
371 #endif
372
373 }