annotate bqvec/src/VectorOpsComplex.cpp @ 372:af71cbdab621 tip

Update bqvec code
author Chris Cannam
date Tue, 19 Nov 2019 10:13:32 +0000
parents 5d0a2ebb4d17
children
rev   line source
Chris@366 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@366 2
Chris@366 3 /*
Chris@366 4 bqvec
Chris@366 5
Chris@366 6 A small library for vector arithmetic and allocation in C++ using
Chris@366 7 raw C pointer arrays.
Chris@366 8
Chris@372 9 Copyright 2007-2016 Particular Programs Ltd.
Chris@366 10
Chris@366 11 Permission is hereby granted, free of charge, to any person
Chris@366 12 obtaining a copy of this software and associated documentation
Chris@366 13 files (the "Software"), to deal in the Software without
Chris@366 14 restriction, including without limitation the rights to use, copy,
Chris@366 15 modify, merge, publish, distribute, sublicense, and/or sell copies
Chris@366 16 of the Software, and to permit persons to whom the Software is
Chris@366 17 furnished to do so, subject to the following conditions:
Chris@366 18
Chris@366 19 The above copyright notice and this permission notice shall be
Chris@366 20 included in all copies or substantial portions of the Software.
Chris@366 21
Chris@366 22 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
Chris@366 23 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
Chris@366 24 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
Chris@366 25 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
Chris@366 26 ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
Chris@366 27 CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
Chris@366 28 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
Chris@366 29
Chris@366 30 Except as contained in this notice, the names of Chris Cannam and
Chris@366 31 Particular Programs Ltd shall not be used in advertising or
Chris@366 32 otherwise to promote the sale, use or other dealings in this
Chris@366 33 Software without prior written authorization.
Chris@366 34 */
Chris@366 35
Chris@372 36 #include "VectorOpsComplex.h"
Chris@366 37
Chris@366 38 #include <cassert>
Chris@366 39
Chris@366 40 #if defined USE_POMMIER_MATHFUN
Chris@366 41 #if defined __ARMEL__
Chris@366 42 #include "pommier/neon_mathfun.h"
Chris@366 43 #else
Chris@366 44 #include "pommier/sse_mathfun.h"
Chris@366 45 #endif
Chris@366 46 #endif
Chris@366 47
Chris@372 48 #if defined(_MSC_VER) || defined(WIN32)
Chris@366 49 #include <malloc.h>
Chris@372 50 #ifndef alloca
Chris@366 51 #define alloca _alloca
Chris@366 52 #endif
Chris@372 53 #else
Chris@372 54 #include <alloca.h>
Chris@372 55 #endif
Chris@372 56
Chris@372 57 #include <iostream>
Chris@372 58
Chris@372 59 using namespace std;
Chris@366 60
Chris@366 61 namespace breakfastquay {
Chris@366 62
Chris@366 63 #ifdef USE_APPROXIMATE_ATAN2
Chris@366 64 float approximate_atan2f(float real, float imag)
Chris@366 65 {
Chris@366 66 static const float pi = M_PI;
Chris@366 67 static const float pi2 = M_PI / 2;
Chris@366 68
Chris@366 69 float atan;
Chris@366 70
Chris@366 71 if (real == 0.f) {
Chris@366 72
Chris@366 73 if (imag > 0.0f) atan = pi2;
Chris@366 74 else if (imag == 0.0f) atan = 0.0f;
Chris@366 75 else atan = -pi2;
Chris@366 76
Chris@366 77 } else {
Chris@366 78
Chris@366 79 float z = imag/real;
Chris@366 80
Chris@366 81 if (fabsf(z) < 1.f) {
Chris@366 82 atan = z / (1.f + 0.28f * z * z);
Chris@366 83 if (real < 0.f) {
Chris@366 84 if (imag < 0.f) atan -= pi;
Chris@366 85 else atan += pi;
Chris@366 86 }
Chris@366 87 } else {
Chris@366 88 atan = pi2 - z / (z * z + 0.28f);
Chris@366 89 if (imag < 0.f) atan -= pi;
Chris@366 90 }
Chris@366 91 }
Chris@372 92
Chris@372 93 return atan;
Chris@366 94 }
Chris@366 95 #endif
Chris@366 96
Chris@366 97 #if defined USE_POMMIER_MATHFUN
Chris@366 98
Chris@366 99 #ifdef __ARMEL__
Chris@366 100 typedef union {
Chris@366 101 float f[4];
Chris@366 102 int i[4];
Chris@366 103 v4sf v;
Chris@366 104 } V4SF;
Chris@366 105 #else
Chris@366 106 typedef ALIGN16_BEG union {
Chris@366 107 float f[4];
Chris@366 108 int i[4];
Chris@366 109 v4sf v;
Chris@366 110 } ALIGN16_END V4SF;
Chris@366 111 #endif
Chris@366 112
Chris@366 113 void
Chris@366 114 v_polar_to_cartesian_pommier(float *const BQ_R__ real,
Chris@366 115 float *const BQ_R__ imag,
Chris@366 116 const float *const BQ_R__ mag,
Chris@366 117 const float *const BQ_R__ phase,
Chris@366 118 const int count)
Chris@366 119 {
Chris@366 120 int idx = 0, tidx = 0;
Chris@366 121 int i = 0;
Chris@366 122
Chris@366 123 for (int i = 0; i + 4 < count; i += 4) {
Chris@366 124
Chris@366 125 V4SF fmag, fphase, fre, fim;
Chris@366 126
Chris@366 127 for (int j = 0; j < 3; ++j) {
Chris@366 128 fmag.f[j] = mag[idx];
Chris@366 129 fphase.f[j] = phase[idx++];
Chris@366 130 }
Chris@366 131
Chris@366 132 sincos_ps(fphase.v, &fim.v, &fre.v);
Chris@366 133
Chris@366 134 for (int j = 0; j < 3; ++j) {
Chris@366 135 real[tidx] = fre.f[j] * fmag.f[j];
Chris@366 136 imag[tidx++] = fim.f[j] * fmag.f[j];
Chris@366 137 }
Chris@366 138 }
Chris@366 139
Chris@366 140 while (i < count) {
Chris@366 141 float re, im;
Chris@366 142 c_phasor(&re, &im, phase[i]);
Chris@366 143 real[tidx] = re * mag[i];
Chris@366 144 imag[tidx++] = im * mag[i];
Chris@366 145 ++i;
Chris@366 146 }
Chris@366 147 }
Chris@366 148
Chris@366 149 void
Chris@366 150 v_polar_interleaved_to_cartesian_inplace_pommier(float *const BQ_R__ srcdst,
Chris@366 151 const int count)
Chris@366 152 {
Chris@366 153 int i;
Chris@366 154 int idx = 0, tidx = 0;
Chris@366 155
Chris@366 156 for (i = 0; i + 4 < count; i += 4) {
Chris@366 157
Chris@366 158 V4SF fmag, fphase, fre, fim;
Chris@366 159
Chris@366 160 for (int j = 0; j < 3; ++j) {
Chris@366 161 fmag.f[j] = srcdst[idx++];
Chris@366 162 fphase.f[j] = srcdst[idx++];
Chris@366 163 }
Chris@366 164
Chris@366 165 sincos_ps(fphase.v, &fim.v, &fre.v);
Chris@366 166
Chris@366 167 for (int j = 0; j < 3; ++j) {
Chris@366 168 srcdst[tidx++] = fre.f[j] * fmag.f[j];
Chris@366 169 srcdst[tidx++] = fim.f[j] * fmag.f[j];
Chris@366 170 }
Chris@366 171 }
Chris@366 172
Chris@366 173 while (i < count) {
Chris@366 174 float real, imag;
Chris@366 175 float mag = srcdst[idx++];
Chris@366 176 float phase = srcdst[idx++];
Chris@366 177 c_phasor(&real, &imag, phase);
Chris@366 178 srcdst[tidx++] = real * mag;
Chris@366 179 srcdst[tidx++] = imag * mag;
Chris@366 180 ++i;
Chris@366 181 }
Chris@366 182 }
Chris@366 183
Chris@366 184 void
Chris@366 185 v_polar_to_cartesian_interleaved_pommier(float *const BQ_R__ dst,
Chris@366 186 const float *const BQ_R__ mag,
Chris@366 187 const float *const BQ_R__ phase,
Chris@366 188 const int count)
Chris@366 189 {
Chris@366 190 int i;
Chris@366 191 int idx = 0, tidx = 0;
Chris@366 192
Chris@366 193 for (i = 0; i + 4 <= count; i += 4) {
Chris@366 194
Chris@366 195 V4SF fmag, fphase, fre, fim;
Chris@366 196
Chris@366 197 for (int j = 0; j < 3; ++j) {
Chris@366 198 fmag.f[j] = mag[idx];
Chris@366 199 fphase.f[j] = phase[idx];
Chris@366 200 ++idx;
Chris@366 201 }
Chris@366 202
Chris@366 203 sincos_ps(fphase.v, &fim.v, &fre.v);
Chris@366 204
Chris@366 205 for (int j = 0; j < 3; ++j) {
Chris@366 206 dst[tidx++] = fre.f[j] * fmag.f[j];
Chris@366 207 dst[tidx++] = fim.f[j] * fmag.f[j];
Chris@366 208 }
Chris@366 209 }
Chris@366 210
Chris@366 211 while (i < count) {
Chris@366 212 float real, imag;
Chris@366 213 c_phasor(&real, &imag, phase[i]);
Chris@366 214 dst[tidx++] = real * mag[i];
Chris@366 215 dst[tidx++] = imag * mag[i];
Chris@366 216 ++i;
Chris@366 217 }
Chris@366 218 }
Chris@366 219
Chris@366 220 #endif
Chris@366 221
Chris@366 222 #ifndef NO_COMPLEX_TYPES
Chris@366 223
Chris@366 224 void
Chris@366 225 v_polar_to_cartesian(bq_complex_t *const BQ_R__ dst,
Chris@366 226 const bq_complex_element_t *const BQ_R__ mag,
Chris@366 227 const bq_complex_element_t *const BQ_R__ phase,
Chris@366 228 const int count)
Chris@366 229 {
Chris@366 230 if (sizeof(bq_complex_element_t) == sizeof(float)) {
Chris@372 231 v_polar_to_cartesian_interleaved((float *)dst,
Chris@372 232 (const float *)mag,
Chris@372 233 (const float *)phase,
Chris@372 234 count);
Chris@366 235 } else {
Chris@372 236 v_polar_to_cartesian_interleaved((double *)dst,
Chris@372 237 (const double *)mag,
Chris@372 238 (const double *)phase,
Chris@372 239 count);
Chris@366 240 }
Chris@366 241 }
Chris@366 242
Chris@372 243 void
Chris@372 244 v_cartesian_to_polar(bq_complex_element_t *const BQ_R__ mag,
Chris@372 245 bq_complex_element_t *const BQ_R__ phase,
Chris@372 246 const bq_complex_t *const BQ_R__ src,
Chris@372 247 const int count)
Chris@372 248 {
Chris@372 249 if (sizeof(bq_complex_element_t) == sizeof(float)) {
Chris@372 250 v_cartesian_interleaved_to_polar((float *)mag,
Chris@372 251 (float *)phase,
Chris@372 252 (const float *)src,
Chris@372 253 count);
Chris@372 254 } else {
Chris@372 255 v_cartesian_interleaved_to_polar((double *)mag,
Chris@372 256 (double *)phase,
Chris@372 257 (const double *)src,
Chris@372 258 count);
Chris@372 259 }
Chris@372 260 }
Chris@366 261
Chris@366 262 void
Chris@372 263 v_cartesian_to_magnitudes(bq_complex_element_t *const BQ_R__ mag,
Chris@372 264 const bq_complex_t *const BQ_R__ src,
Chris@372 265 const int count)
Chris@366 266 {
Chris@366 267 if (sizeof(bq_complex_element_t) == sizeof(float)) {
Chris@372 268 v_cartesian_interleaved_to_magnitudes((float *)mag,
Chris@372 269 (const float *)src,
Chris@372 270 count);
Chris@366 271 } else {
Chris@372 272 v_cartesian_interleaved_to_magnitudes((double *)mag,
Chris@372 273 (const double *)src,
Chris@372 274 count);
Chris@366 275 }
Chris@372 276 }
Chris@366 277
Chris@366 278 #if defined USE_POMMIER_MATHFUN
Chris@366 279
Chris@366 280 //!!! further tests reqd. This is only single precision but it seems
Chris@366 281 //!!! to be much faster than normal math library sincos. The comments
Chris@366 282 //!!! note that precision suffers for high arguments to sincos though,
Chris@366 283 //!!! and that is probably a common case for us
Chris@366 284
Chris@366 285 void
Chris@366 286 v_polar_interleaved_to_cartesian(bq_complex_t *const BQ_R__ dst,
Chris@366 287 const bq_complex_element_t *const BQ_R__ src,
Chris@366 288 const int count)
Chris@366 289 {
Chris@366 290 int idx = 0, tidx = 0;
Chris@366 291
Chris@366 292 for (int i = 0; i < count; i += 4) {
Chris@366 293
Chris@366 294 V4SF fmag, fphase, fre, fim;
Chris@366 295
Chris@366 296 for (int j = 0; j < 3; ++j) {
Chris@366 297 fmag.f[j] = src[idx++];
Chris@366 298 fphase.f[j] = src[idx++];
Chris@366 299 }
Chris@366 300
Chris@366 301 sincos_ps(fphase.v, &fim.v, &fre.v);
Chris@366 302
Chris@366 303 for (int j = 0; j < 3; ++j) {
Chris@366 304 dst[tidx].re = fre.f[j] * fmag.f[j];
Chris@366 305 dst[tidx++].im = fim.f[j] * fmag.f[j];
Chris@366 306 }
Chris@366 307 }
Chris@366 308 }
Chris@366 309
Chris@366 310 #elif (defined HAVE_IPP || defined HAVE_VDSP)
Chris@366 311
Chris@366 312 // with a vector library, it should be faster to deinterleave and call
Chris@366 313 // the basic fn
Chris@366 314
Chris@366 315 void
Chris@366 316 v_polar_interleaved_to_cartesian(bq_complex_t *const BQ_R__ dst,
Chris@366 317 const bq_complex_element_t *const BQ_R__ src,
Chris@366 318 const int count)
Chris@366 319 {
Chris@366 320 bq_complex_element_t *mag = (bq_complex_element_t *)
Chris@366 321 alloca(count * sizeof(bq_complex_element_t));
Chris@366 322 bq_complex_element_t *phase = (bq_complex_element_t *)
Chris@366 323 alloca(count * sizeof(bq_complex_element_t));
Chris@366 324 bq_complex_element_t *magphase[] = { mag, phase };
Chris@366 325
Chris@366 326 v_deinterleave(magphase, src, 2, count);
Chris@366 327 v_polar_to_cartesian(dst, mag, phase, count);
Chris@366 328 }
Chris@366 329
Chris@366 330 #else
Chris@366 331
Chris@366 332 // without a vector library, better avoid the deinterleave step
Chris@366 333
Chris@366 334 void
Chris@366 335 v_polar_interleaved_to_cartesian(bq_complex_t *const BQ_R__ dst,
Chris@366 336 const bq_complex_element_t *const BQ_R__ src,
Chris@366 337 const int count)
Chris@366 338 {
Chris@366 339 bq_complex_element_t mag, phase;
Chris@366 340 int idx = 0;
Chris@366 341 for (int i = 0; i < count; ++i) {
Chris@366 342 mag = src[idx++];
Chris@366 343 phase = src[idx++];
Chris@366 344 dst[i] = c_phasor(phase);
Chris@366 345 dst[i].re *= mag;
Chris@366 346 dst[i].im *= mag;
Chris@366 347 }
Chris@366 348 }
Chris@366 349
Chris@366 350 #endif
Chris@366 351
Chris@366 352 void
Chris@366 353 v_polar_interleaved_to_cartesian_inplace(bq_complex_element_t *const BQ_R__ srcdst,
Chris@366 354 const int count)
Chris@366 355 {
Chris@366 356 bq_complex_element_t mag, phase;
Chris@366 357 int ii = 0, io = 0;
Chris@366 358 for (int i = 0; i < count; ++i) {
Chris@366 359 mag = srcdst[ii++];
Chris@366 360 phase = srcdst[ii++];
Chris@366 361 bq_complex_t p = c_phasor(phase);
Chris@366 362 srcdst[io++] = mag * p.re;
Chris@366 363 srcdst[io++] = mag * p.im;
Chris@366 364 }
Chris@366 365 }
Chris@366 366
Chris@372 367 #endif // NO_COMPLEX_TYPES
Chris@366 368
Chris@366 369 }