comparison bqvec/src/VectorOpsComplex.cpp @ 372:af71cbdab621 tip

Update bqvec code
author Chris Cannam
date Tue, 19 Nov 2019 10:13:32 +0000
parents 5d0a2ebb4d17
children
comparison
equal deleted inserted replaced
371:426ce52ef096 372:af71cbdab621
4 bqvec 4 bqvec
5 5
6 A small library for vector arithmetic and allocation in C++ using 6 A small library for vector arithmetic and allocation in C++ using
7 raw C pointer arrays. 7 raw C pointer arrays.
8 8
9 Copyright 2007-2015 Particular Programs Ltd. 9 Copyright 2007-2016 Particular Programs Ltd.
10 10
11 Permission is hereby granted, free of charge, to any person 11 Permission is hereby granted, free of charge, to any person
12 obtaining a copy of this software and associated documentation 12 obtaining a copy of this software and associated documentation
13 files (the "Software"), to deal in the Software without 13 files (the "Software"), to deal in the Software without
14 restriction, including without limitation the rights to use, copy, 14 restriction, including without limitation the rights to use, copy,
31 Particular Programs Ltd shall not be used in advertising or 31 Particular Programs Ltd shall not be used in advertising or
32 otherwise to promote the sale, use or other dealings in this 32 otherwise to promote the sale, use or other dealings in this
33 Software without prior written authorization. 33 Software without prior written authorization.
34 */ 34 */
35 35
36 #include "bqvec/VectorOpsComplex.h" 36 #include "VectorOpsComplex.h"
37 37
38 #include <cassert> 38 #include <cassert>
39
40 #ifdef __MSVC__
41 #include <malloc.h>
42 #define alloca _alloca
43 #endif
44 39
45 #if defined USE_POMMIER_MATHFUN 40 #if defined USE_POMMIER_MATHFUN
46 #if defined __ARMEL__ 41 #if defined __ARMEL__
47 #include "pommier/neon_mathfun.h" 42 #include "pommier/neon_mathfun.h"
48 #else 43 #else
49 #include "pommier/sse_mathfun.h" 44 #include "pommier/sse_mathfun.h"
50 #endif 45 #endif
51 #endif 46 #endif
52 47
53 #ifdef __MSVC__ 48 #if defined(_MSC_VER) || defined(WIN32)
54 #include <malloc.h> 49 #include <malloc.h>
50 #ifndef alloca
55 #define alloca _alloca 51 #define alloca _alloca
56 #endif 52 #endif
53 #else
54 #include <alloca.h>
55 #endif
56
57 #include <iostream>
58
59 using namespace std;
57 60
58 namespace breakfastquay { 61 namespace breakfastquay {
59 62
60 #ifdef USE_APPROXIMATE_ATAN2 63 #ifdef USE_APPROXIMATE_ATAN2
61 float approximate_atan2f(float real, float imag) 64 float approximate_atan2f(float real, float imag)
84 } else { 87 } else {
85 atan = pi2 - z / (z * z + 0.28f); 88 atan = pi2 - z / (z * z + 0.28f);
86 if (imag < 0.f) atan -= pi; 89 if (imag < 0.f) atan -= pi;
87 } 90 }
88 } 91 }
92
93 return atan;
89 } 94 }
90 #endif 95 #endif
91 96
92 #if defined USE_POMMIER_MATHFUN 97 #if defined USE_POMMIER_MATHFUN
93 98
214 219
215 #endif 220 #endif
216 221
217 #ifndef NO_COMPLEX_TYPES 222 #ifndef NO_COMPLEX_TYPES
218 223
219 #if defined HAVE_IPP
220
221 void 224 void
222 v_polar_to_cartesian(bq_complex_t *const BQ_R__ dst, 225 v_polar_to_cartesian(bq_complex_t *const BQ_R__ dst,
223 const bq_complex_element_t *const BQ_R__ mag, 226 const bq_complex_element_t *const BQ_R__ mag,
224 const bq_complex_element_t *const BQ_R__ phase, 227 const bq_complex_element_t *const BQ_R__ phase,
225 const int count) 228 const int count)
226 { 229 {
227 if (sizeof(bq_complex_element_t) == sizeof(float)) { 230 if (sizeof(bq_complex_element_t) == sizeof(float)) {
228 ippsPolarToCart_32fc((const float *)mag, (const float *)phase, 231 v_polar_to_cartesian_interleaved((float *)dst,
229 (Ipp32fc *)dst, count); 232 (const float *)mag,
233 (const float *)phase,
234 count);
230 } else { 235 } else {
231 ippsPolarToCart_64fc((const double *)mag, (const double *)phase, 236 v_polar_to_cartesian_interleaved((double *)dst,
232 (Ipp64fc *)dst, count); 237 (const double *)mag,
233 } 238 (const double *)phase,
234 } 239 count);
235 240 }
236 #elif defined HAVE_VDSP 241 }
237 242
238 void 243 void
239 v_polar_to_cartesian(bq_complex_t *const BQ_R__ dst, 244 v_cartesian_to_polar(bq_complex_element_t *const BQ_R__ mag,
240 const bq_complex_element_t *const BQ_R__ mag, 245 bq_complex_element_t *const BQ_R__ phase,
241 const bq_complex_element_t *const BQ_R__ phase, 246 const bq_complex_t *const BQ_R__ src,
242 const int count) 247 const int count)
243 { 248 {
244 bq_complex_element_t *sc = (bq_complex_element_t *)
245 alloca(count * 2 * sizeof(bq_complex_element_t));
246
247 if (sizeof(bq_complex_element_t) == sizeof(float)) { 249 if (sizeof(bq_complex_element_t) == sizeof(float)) {
248 vvsincosf((float *)sc, (float *)(sc + count), (float *)phase, &count); 250 v_cartesian_interleaved_to_polar((float *)mag,
251 (float *)phase,
252 (const float *)src,
253 count);
249 } else { 254 } else {
250 vvsincos((double *)sc, (double *)(sc + count), (double *)phase, &count); 255 v_cartesian_interleaved_to_polar((double *)mag,
251 } 256 (double *)phase,
252 257 (const double *)src,
253 int sini = 0; 258 count);
254 int cosi = count; 259 }
255 260 }
256 for (int i = 0; i < count; ++i) { 261
257 dst[i].re = mag[i] * sc[cosi++]; 262 void
258 dst[i].im = mag[i] * sc[sini++]; 263 v_cartesian_to_magnitudes(bq_complex_element_t *const BQ_R__ mag,
259 } 264 const bq_complex_t *const BQ_R__ src,
260 } 265 const int count)
261 266 {
262 #else 267 if (sizeof(bq_complex_element_t) == sizeof(float)) {
263 268 v_cartesian_interleaved_to_magnitudes((float *)mag,
264 void 269 (const float *)src,
265 v_polar_to_cartesian(bq_complex_t *const BQ_R__ dst, 270 count);
266 const bq_complex_element_t *const BQ_R__ mag, 271 } else {
267 const bq_complex_element_t *const BQ_R__ phase, 272 v_cartesian_interleaved_to_magnitudes((double *)mag,
268 const int count) 273 (const double *)src,
269 { 274 count);
270 for (int i = 0; i < count; ++i) { 275 }
271 dst[i] = c_phasor(phase[i]); 276 }
272 }
273 for (int i = 0; i < count; ++i) {
274 dst[i].re *= mag[i];
275 dst[i].im *= mag[i];
276 }
277 }
278
279 #endif
280 277
281 #if defined USE_POMMIER_MATHFUN 278 #if defined USE_POMMIER_MATHFUN
282 279
283 //!!! further tests reqd. This is only single precision but it seems 280 //!!! further tests reqd. This is only single precision but it seems
284 //!!! to be much faster than normal math library sincos. The comments 281 //!!! to be much faster than normal math library sincos. The comments
354 351
355 void 352 void
356 v_polar_interleaved_to_cartesian_inplace(bq_complex_element_t *const BQ_R__ srcdst, 353 v_polar_interleaved_to_cartesian_inplace(bq_complex_element_t *const BQ_R__ srcdst,
357 const int count) 354 const int count)
358 { 355 {
359 // Not ideal
360 bq_complex_element_t mag, phase; 356 bq_complex_element_t mag, phase;
361 int ii = 0, io = 0; 357 int ii = 0, io = 0;
362 for (int i = 0; i < count; ++i) { 358 for (int i = 0; i < count; ++i) {
363 mag = srcdst[ii++]; 359 mag = srcdst[ii++];
364 phase = srcdst[ii++]; 360 phase = srcdst[ii++];
366 srcdst[io++] = mag * p.re; 362 srcdst[io++] = mag * p.re;
367 srcdst[io++] = mag * p.im; 363 srcdst[io++] = mag * p.im;
368 } 364 }
369 } 365 }
370 366
371 #endif 367 #endif // NO_COMPLEX_TYPES
372 368
373 } 369 }