annotate bqvec/src/VectorOpsComplex.cpp @ 369:ef35549c1c56

Whoops, we need the Vamp SDK library to build the plugin!
author Chris Cannam
date Wed, 03 Aug 2016 10:47:26 +0100
parents 5d0a2ebb4d17
children af71cbdab621
rev   line source
Chris@366 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@366 2
Chris@366 3 /*
Chris@366 4 bqvec
Chris@366 5
Chris@366 6 A small library for vector arithmetic and allocation in C++ using
Chris@366 7 raw C pointer arrays.
Chris@366 8
Chris@366 9 Copyright 2007-2015 Particular Programs Ltd.
Chris@366 10
Chris@366 11 Permission is hereby granted, free of charge, to any person
Chris@366 12 obtaining a copy of this software and associated documentation
Chris@366 13 files (the "Software"), to deal in the Software without
Chris@366 14 restriction, including without limitation the rights to use, copy,
Chris@366 15 modify, merge, publish, distribute, sublicense, and/or sell copies
Chris@366 16 of the Software, and to permit persons to whom the Software is
Chris@366 17 furnished to do so, subject to the following conditions:
Chris@366 18
Chris@366 19 The above copyright notice and this permission notice shall be
Chris@366 20 included in all copies or substantial portions of the Software.
Chris@366 21
Chris@366 22 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
Chris@366 23 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
Chris@366 24 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
Chris@366 25 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
Chris@366 26 ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
Chris@366 27 CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
Chris@366 28 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
Chris@366 29
Chris@366 30 Except as contained in this notice, the names of Chris Cannam and
Chris@366 31 Particular Programs Ltd shall not be used in advertising or
Chris@366 32 otherwise to promote the sale, use or other dealings in this
Chris@366 33 Software without prior written authorization.
Chris@366 34 */
Chris@366 35
Chris@366 36 #include "bqvec/VectorOpsComplex.h"
Chris@366 37
Chris@366 38 #include <cassert>
Chris@366 39
Chris@366 40 #ifdef __MSVC__
Chris@366 41 #include <malloc.h>
Chris@366 42 #define alloca _alloca
Chris@366 43 #endif
Chris@366 44
Chris@366 45 #if defined USE_POMMIER_MATHFUN
Chris@366 46 #if defined __ARMEL__
Chris@366 47 #include "pommier/neon_mathfun.h"
Chris@366 48 #else
Chris@366 49 #include "pommier/sse_mathfun.h"
Chris@366 50 #endif
Chris@366 51 #endif
Chris@366 52
Chris@366 53 #ifdef __MSVC__
Chris@366 54 #include <malloc.h>
Chris@366 55 #define alloca _alloca
Chris@366 56 #endif
Chris@366 57
Chris@366 58 namespace breakfastquay {
Chris@366 59
Chris@366 60 #ifdef USE_APPROXIMATE_ATAN2
Chris@366 61 float approximate_atan2f(float real, float imag)
Chris@366 62 {
Chris@366 63 static const float pi = M_PI;
Chris@366 64 static const float pi2 = M_PI / 2;
Chris@366 65
Chris@366 66 float atan;
Chris@366 67
Chris@366 68 if (real == 0.f) {
Chris@366 69
Chris@366 70 if (imag > 0.0f) atan = pi2;
Chris@366 71 else if (imag == 0.0f) atan = 0.0f;
Chris@366 72 else atan = -pi2;
Chris@366 73
Chris@366 74 } else {
Chris@366 75
Chris@366 76 float z = imag/real;
Chris@366 77
Chris@366 78 if (fabsf(z) < 1.f) {
Chris@366 79 atan = z / (1.f + 0.28f * z * z);
Chris@366 80 if (real < 0.f) {
Chris@366 81 if (imag < 0.f) atan -= pi;
Chris@366 82 else atan += pi;
Chris@366 83 }
Chris@366 84 } else {
Chris@366 85 atan = pi2 - z / (z * z + 0.28f);
Chris@366 86 if (imag < 0.f) atan -= pi;
Chris@366 87 }
Chris@366 88 }
Chris@366 89 }
Chris@366 90 #endif
Chris@366 91
Chris@366 92 #if defined USE_POMMIER_MATHFUN
Chris@366 93
Chris@366 94 #ifdef __ARMEL__
Chris@366 95 typedef union {
Chris@366 96 float f[4];
Chris@366 97 int i[4];
Chris@366 98 v4sf v;
Chris@366 99 } V4SF;
Chris@366 100 #else
Chris@366 101 typedef ALIGN16_BEG union {
Chris@366 102 float f[4];
Chris@366 103 int i[4];
Chris@366 104 v4sf v;
Chris@366 105 } ALIGN16_END V4SF;
Chris@366 106 #endif
Chris@366 107
Chris@366 108 void
Chris@366 109 v_polar_to_cartesian_pommier(float *const BQ_R__ real,
Chris@366 110 float *const BQ_R__ imag,
Chris@366 111 const float *const BQ_R__ mag,
Chris@366 112 const float *const BQ_R__ phase,
Chris@366 113 const int count)
Chris@366 114 {
Chris@366 115 int idx = 0, tidx = 0;
Chris@366 116 int i = 0;
Chris@366 117
Chris@366 118 for (int i = 0; i + 4 < count; i += 4) {
Chris@366 119
Chris@366 120 V4SF fmag, fphase, fre, fim;
Chris@366 121
Chris@366 122 for (int j = 0; j < 3; ++j) {
Chris@366 123 fmag.f[j] = mag[idx];
Chris@366 124 fphase.f[j] = phase[idx++];
Chris@366 125 }
Chris@366 126
Chris@366 127 sincos_ps(fphase.v, &fim.v, &fre.v);
Chris@366 128
Chris@366 129 for (int j = 0; j < 3; ++j) {
Chris@366 130 real[tidx] = fre.f[j] * fmag.f[j];
Chris@366 131 imag[tidx++] = fim.f[j] * fmag.f[j];
Chris@366 132 }
Chris@366 133 }
Chris@366 134
Chris@366 135 while (i < count) {
Chris@366 136 float re, im;
Chris@366 137 c_phasor(&re, &im, phase[i]);
Chris@366 138 real[tidx] = re * mag[i];
Chris@366 139 imag[tidx++] = im * mag[i];
Chris@366 140 ++i;
Chris@366 141 }
Chris@366 142 }
Chris@366 143
Chris@366 144 void
Chris@366 145 v_polar_interleaved_to_cartesian_inplace_pommier(float *const BQ_R__ srcdst,
Chris@366 146 const int count)
Chris@366 147 {
Chris@366 148 int i;
Chris@366 149 int idx = 0, tidx = 0;
Chris@366 150
Chris@366 151 for (i = 0; i + 4 < count; i += 4) {
Chris@366 152
Chris@366 153 V4SF fmag, fphase, fre, fim;
Chris@366 154
Chris@366 155 for (int j = 0; j < 3; ++j) {
Chris@366 156 fmag.f[j] = srcdst[idx++];
Chris@366 157 fphase.f[j] = srcdst[idx++];
Chris@366 158 }
Chris@366 159
Chris@366 160 sincos_ps(fphase.v, &fim.v, &fre.v);
Chris@366 161
Chris@366 162 for (int j = 0; j < 3; ++j) {
Chris@366 163 srcdst[tidx++] = fre.f[j] * fmag.f[j];
Chris@366 164 srcdst[tidx++] = fim.f[j] * fmag.f[j];
Chris@366 165 }
Chris@366 166 }
Chris@366 167
Chris@366 168 while (i < count) {
Chris@366 169 float real, imag;
Chris@366 170 float mag = srcdst[idx++];
Chris@366 171 float phase = srcdst[idx++];
Chris@366 172 c_phasor(&real, &imag, phase);
Chris@366 173 srcdst[tidx++] = real * mag;
Chris@366 174 srcdst[tidx++] = imag * mag;
Chris@366 175 ++i;
Chris@366 176 }
Chris@366 177 }
Chris@366 178
Chris@366 179 void
Chris@366 180 v_polar_to_cartesian_interleaved_pommier(float *const BQ_R__ dst,
Chris@366 181 const float *const BQ_R__ mag,
Chris@366 182 const float *const BQ_R__ phase,
Chris@366 183 const int count)
Chris@366 184 {
Chris@366 185 int i;
Chris@366 186 int idx = 0, tidx = 0;
Chris@366 187
Chris@366 188 for (i = 0; i + 4 <= count; i += 4) {
Chris@366 189
Chris@366 190 V4SF fmag, fphase, fre, fim;
Chris@366 191
Chris@366 192 for (int j = 0; j < 3; ++j) {
Chris@366 193 fmag.f[j] = mag[idx];
Chris@366 194 fphase.f[j] = phase[idx];
Chris@366 195 ++idx;
Chris@366 196 }
Chris@366 197
Chris@366 198 sincos_ps(fphase.v, &fim.v, &fre.v);
Chris@366 199
Chris@366 200 for (int j = 0; j < 3; ++j) {
Chris@366 201 dst[tidx++] = fre.f[j] * fmag.f[j];
Chris@366 202 dst[tidx++] = fim.f[j] * fmag.f[j];
Chris@366 203 }
Chris@366 204 }
Chris@366 205
Chris@366 206 while (i < count) {
Chris@366 207 float real, imag;
Chris@366 208 c_phasor(&real, &imag, phase[i]);
Chris@366 209 dst[tidx++] = real * mag[i];
Chris@366 210 dst[tidx++] = imag * mag[i];
Chris@366 211 ++i;
Chris@366 212 }
Chris@366 213 }
Chris@366 214
Chris@366 215 #endif
Chris@366 216
Chris@366 217 #ifndef NO_COMPLEX_TYPES
Chris@366 218
Chris@366 219 #if defined HAVE_IPP
Chris@366 220
Chris@366 221 void
Chris@366 222 v_polar_to_cartesian(bq_complex_t *const BQ_R__ dst,
Chris@366 223 const bq_complex_element_t *const BQ_R__ mag,
Chris@366 224 const bq_complex_element_t *const BQ_R__ phase,
Chris@366 225 const int count)
Chris@366 226 {
Chris@366 227 if (sizeof(bq_complex_element_t) == sizeof(float)) {
Chris@366 228 ippsPolarToCart_32fc((const float *)mag, (const float *)phase,
Chris@366 229 (Ipp32fc *)dst, count);
Chris@366 230 } else {
Chris@366 231 ippsPolarToCart_64fc((const double *)mag, (const double *)phase,
Chris@366 232 (Ipp64fc *)dst, count);
Chris@366 233 }
Chris@366 234 }
Chris@366 235
Chris@366 236 #elif defined HAVE_VDSP
Chris@366 237
Chris@366 238 void
Chris@366 239 v_polar_to_cartesian(bq_complex_t *const BQ_R__ dst,
Chris@366 240 const bq_complex_element_t *const BQ_R__ mag,
Chris@366 241 const bq_complex_element_t *const BQ_R__ phase,
Chris@366 242 const int count)
Chris@366 243 {
Chris@366 244 bq_complex_element_t *sc = (bq_complex_element_t *)
Chris@366 245 alloca(count * 2 * sizeof(bq_complex_element_t));
Chris@366 246
Chris@366 247 if (sizeof(bq_complex_element_t) == sizeof(float)) {
Chris@366 248 vvsincosf((float *)sc, (float *)(sc + count), (float *)phase, &count);
Chris@366 249 } else {
Chris@366 250 vvsincos((double *)sc, (double *)(sc + count), (double *)phase, &count);
Chris@366 251 }
Chris@366 252
Chris@366 253 int sini = 0;
Chris@366 254 int cosi = count;
Chris@366 255
Chris@366 256 for (int i = 0; i < count; ++i) {
Chris@366 257 dst[i].re = mag[i] * sc[cosi++];
Chris@366 258 dst[i].im = mag[i] * sc[sini++];
Chris@366 259 }
Chris@366 260 }
Chris@366 261
Chris@366 262 #else
Chris@366 263
Chris@366 264 void
Chris@366 265 v_polar_to_cartesian(bq_complex_t *const BQ_R__ dst,
Chris@366 266 const bq_complex_element_t *const BQ_R__ mag,
Chris@366 267 const bq_complex_element_t *const BQ_R__ phase,
Chris@366 268 const int count)
Chris@366 269 {
Chris@366 270 for (int i = 0; i < count; ++i) {
Chris@366 271 dst[i] = c_phasor(phase[i]);
Chris@366 272 }
Chris@366 273 for (int i = 0; i < count; ++i) {
Chris@366 274 dst[i].re *= mag[i];
Chris@366 275 dst[i].im *= mag[i];
Chris@366 276 }
Chris@366 277 }
Chris@366 278
Chris@366 279 #endif
Chris@366 280
Chris@366 281 #if defined USE_POMMIER_MATHFUN
Chris@366 282
Chris@366 283 //!!! further tests reqd. This is only single precision but it seems
Chris@366 284 //!!! to be much faster than normal math library sincos. The comments
Chris@366 285 //!!! note that precision suffers for high arguments to sincos though,
Chris@366 286 //!!! and that is probably a common case for us
Chris@366 287
Chris@366 288 void
Chris@366 289 v_polar_interleaved_to_cartesian(bq_complex_t *const BQ_R__ dst,
Chris@366 290 const bq_complex_element_t *const BQ_R__ src,
Chris@366 291 const int count)
Chris@366 292 {
Chris@366 293 int idx = 0, tidx = 0;
Chris@366 294
Chris@366 295 for (int i = 0; i < count; i += 4) {
Chris@366 296
Chris@366 297 V4SF fmag, fphase, fre, fim;
Chris@366 298
Chris@366 299 for (int j = 0; j < 3; ++j) {
Chris@366 300 fmag.f[j] = src[idx++];
Chris@366 301 fphase.f[j] = src[idx++];
Chris@366 302 }
Chris@366 303
Chris@366 304 sincos_ps(fphase.v, &fim.v, &fre.v);
Chris@366 305
Chris@366 306 for (int j = 0; j < 3; ++j) {
Chris@366 307 dst[tidx].re = fre.f[j] * fmag.f[j];
Chris@366 308 dst[tidx++].im = fim.f[j] * fmag.f[j];
Chris@366 309 }
Chris@366 310 }
Chris@366 311 }
Chris@366 312
Chris@366 313 #elif (defined HAVE_IPP || defined HAVE_VDSP)
Chris@366 314
Chris@366 315 // with a vector library, it should be faster to deinterleave and call
Chris@366 316 // the basic fn
Chris@366 317
Chris@366 318 void
Chris@366 319 v_polar_interleaved_to_cartesian(bq_complex_t *const BQ_R__ dst,
Chris@366 320 const bq_complex_element_t *const BQ_R__ src,
Chris@366 321 const int count)
Chris@366 322 {
Chris@366 323 bq_complex_element_t *mag = (bq_complex_element_t *)
Chris@366 324 alloca(count * sizeof(bq_complex_element_t));
Chris@366 325 bq_complex_element_t *phase = (bq_complex_element_t *)
Chris@366 326 alloca(count * sizeof(bq_complex_element_t));
Chris@366 327 bq_complex_element_t *magphase[] = { mag, phase };
Chris@366 328
Chris@366 329 v_deinterleave(magphase, src, 2, count);
Chris@366 330 v_polar_to_cartesian(dst, mag, phase, count);
Chris@366 331 }
Chris@366 332
Chris@366 333 #else
Chris@366 334
Chris@366 335 // without a vector library, better avoid the deinterleave step
Chris@366 336
Chris@366 337 void
Chris@366 338 v_polar_interleaved_to_cartesian(bq_complex_t *const BQ_R__ dst,
Chris@366 339 const bq_complex_element_t *const BQ_R__ src,
Chris@366 340 const int count)
Chris@366 341 {
Chris@366 342 bq_complex_element_t mag, phase;
Chris@366 343 int idx = 0;
Chris@366 344 for (int i = 0; i < count; ++i) {
Chris@366 345 mag = src[idx++];
Chris@366 346 phase = src[idx++];
Chris@366 347 dst[i] = c_phasor(phase);
Chris@366 348 dst[i].re *= mag;
Chris@366 349 dst[i].im *= mag;
Chris@366 350 }
Chris@366 351 }
Chris@366 352
Chris@366 353 #endif
Chris@366 354
Chris@366 355 void
Chris@366 356 v_polar_interleaved_to_cartesian_inplace(bq_complex_element_t *const BQ_R__ srcdst,
Chris@366 357 const int count)
Chris@366 358 {
Chris@366 359 // Not ideal
Chris@366 360 bq_complex_element_t mag, phase;
Chris@366 361 int ii = 0, io = 0;
Chris@366 362 for (int i = 0; i < count; ++i) {
Chris@366 363 mag = srcdst[ii++];
Chris@366 364 phase = srcdst[ii++];
Chris@366 365 bq_complex_t p = c_phasor(phase);
Chris@366 366 srcdst[io++] = mag * p.re;
Chris@366 367 srcdst[io++] = mag * p.im;
Chris@366 368 }
Chris@366 369 }
Chris@366 370
Chris@366 371 #endif
Chris@366 372
Chris@366 373 }