Chris@366: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ Chris@366: Chris@366: /* Chris@366: bqvec Chris@366: Chris@366: A small library for vector arithmetic and allocation in C++ using Chris@366: raw C pointer arrays. Chris@366: Chris@372: Copyright 2007-2016 Particular Programs Ltd. Chris@366: Chris@366: Permission is hereby granted, free of charge, to any person Chris@366: obtaining a copy of this software and associated documentation Chris@366: files (the "Software"), to deal in the Software without Chris@366: restriction, including without limitation the rights to use, copy, Chris@366: modify, merge, publish, distribute, sublicense, and/or sell copies Chris@366: of the Software, and to permit persons to whom the Software is Chris@366: furnished to do so, subject to the following conditions: Chris@366: Chris@366: The above copyright notice and this permission notice shall be Chris@366: included in all copies or substantial portions of the Software. Chris@366: Chris@366: THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, Chris@366: EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF Chris@366: MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND Chris@366: NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR Chris@366: ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF Chris@366: CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION Chris@366: WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. Chris@366: Chris@366: Except as contained in this notice, the names of Chris Cannam and Chris@366: Particular Programs Ltd shall not be used in advertising or Chris@366: otherwise to promote the sale, use or other dealings in this Chris@366: Software without prior written authorization. Chris@366: */ Chris@366: Chris@372: #include "VectorOpsComplex.h" Chris@366: Chris@366: #include Chris@366: Chris@366: #if defined USE_POMMIER_MATHFUN Chris@366: #if defined __ARMEL__ Chris@366: #include "pommier/neon_mathfun.h" Chris@366: #else Chris@366: #include "pommier/sse_mathfun.h" Chris@366: #endif Chris@366: #endif Chris@366: Chris@372: #if defined(_MSC_VER) || defined(WIN32) Chris@366: #include Chris@372: #ifndef alloca Chris@366: #define alloca _alloca Chris@366: #endif Chris@372: #else Chris@372: #include Chris@372: #endif Chris@372: Chris@372: #include Chris@372: Chris@372: using namespace std; Chris@366: Chris@366: namespace breakfastquay { Chris@366: Chris@366: #ifdef USE_APPROXIMATE_ATAN2 Chris@366: float approximate_atan2f(float real, float imag) Chris@366: { Chris@366: static const float pi = M_PI; Chris@366: static const float pi2 = M_PI / 2; Chris@366: Chris@366: float atan; Chris@366: Chris@366: if (real == 0.f) { Chris@366: Chris@366: if (imag > 0.0f) atan = pi2; Chris@366: else if (imag == 0.0f) atan = 0.0f; Chris@366: else atan = -pi2; Chris@366: Chris@366: } else { Chris@366: Chris@366: float z = imag/real; Chris@366: Chris@366: if (fabsf(z) < 1.f) { Chris@366: atan = z / (1.f + 0.28f * z * z); Chris@366: if (real < 0.f) { Chris@366: if (imag < 0.f) atan -= pi; Chris@366: else atan += pi; Chris@366: } Chris@366: } else { Chris@366: atan = pi2 - z / (z * z + 0.28f); Chris@366: if (imag < 0.f) atan -= pi; Chris@366: } Chris@366: } Chris@372: Chris@372: return atan; Chris@366: } Chris@366: #endif Chris@366: Chris@366: #if defined USE_POMMIER_MATHFUN Chris@366: Chris@366: #ifdef __ARMEL__ Chris@366: typedef union { Chris@366: float f[4]; Chris@366: int i[4]; Chris@366: v4sf v; Chris@366: } V4SF; Chris@366: #else Chris@366: typedef ALIGN16_BEG union { Chris@366: float f[4]; Chris@366: int i[4]; Chris@366: v4sf v; Chris@366: } ALIGN16_END V4SF; Chris@366: #endif Chris@366: Chris@366: void Chris@366: v_polar_to_cartesian_pommier(float *const BQ_R__ real, Chris@366: float *const BQ_R__ imag, Chris@366: const float *const BQ_R__ mag, Chris@366: const float *const BQ_R__ phase, Chris@366: const int count) Chris@366: { Chris@366: int idx = 0, tidx = 0; Chris@366: int i = 0; Chris@366: Chris@366: for (int i = 0; i + 4 < count; i += 4) { Chris@366: Chris@366: V4SF fmag, fphase, fre, fim; Chris@366: Chris@366: for (int j = 0; j < 3; ++j) { Chris@366: fmag.f[j] = mag[idx]; Chris@366: fphase.f[j] = phase[idx++]; Chris@366: } Chris@366: Chris@366: sincos_ps(fphase.v, &fim.v, &fre.v); Chris@366: Chris@366: for (int j = 0; j < 3; ++j) { Chris@366: real[tidx] = fre.f[j] * fmag.f[j]; Chris@366: imag[tidx++] = fim.f[j] * fmag.f[j]; Chris@366: } Chris@366: } Chris@366: Chris@366: while (i < count) { Chris@366: float re, im; Chris@366: c_phasor(&re, &im, phase[i]); Chris@366: real[tidx] = re * mag[i]; Chris@366: imag[tidx++] = im * mag[i]; Chris@366: ++i; Chris@366: } Chris@366: } Chris@366: Chris@366: void Chris@366: v_polar_interleaved_to_cartesian_inplace_pommier(float *const BQ_R__ srcdst, Chris@366: const int count) Chris@366: { Chris@366: int i; Chris@366: int idx = 0, tidx = 0; Chris@366: Chris@366: for (i = 0; i + 4 < count; i += 4) { Chris@366: Chris@366: V4SF fmag, fphase, fre, fim; Chris@366: Chris@366: for (int j = 0; j < 3; ++j) { Chris@366: fmag.f[j] = srcdst[idx++]; Chris@366: fphase.f[j] = srcdst[idx++]; Chris@366: } Chris@366: Chris@366: sincos_ps(fphase.v, &fim.v, &fre.v); Chris@366: Chris@366: for (int j = 0; j < 3; ++j) { Chris@366: srcdst[tidx++] = fre.f[j] * fmag.f[j]; Chris@366: srcdst[tidx++] = fim.f[j] * fmag.f[j]; Chris@366: } Chris@366: } Chris@366: Chris@366: while (i < count) { Chris@366: float real, imag; Chris@366: float mag = srcdst[idx++]; Chris@366: float phase = srcdst[idx++]; Chris@366: c_phasor(&real, &imag, phase); Chris@366: srcdst[tidx++] = real * mag; Chris@366: srcdst[tidx++] = imag * mag; Chris@366: ++i; Chris@366: } Chris@366: } Chris@366: Chris@366: void Chris@366: v_polar_to_cartesian_interleaved_pommier(float *const BQ_R__ dst, Chris@366: const float *const BQ_R__ mag, Chris@366: const float *const BQ_R__ phase, Chris@366: const int count) Chris@366: { Chris@366: int i; Chris@366: int idx = 0, tidx = 0; Chris@366: Chris@366: for (i = 0; i + 4 <= count; i += 4) { Chris@366: Chris@366: V4SF fmag, fphase, fre, fim; Chris@366: Chris@366: for (int j = 0; j < 3; ++j) { Chris@366: fmag.f[j] = mag[idx]; Chris@366: fphase.f[j] = phase[idx]; Chris@366: ++idx; Chris@366: } Chris@366: Chris@366: sincos_ps(fphase.v, &fim.v, &fre.v); Chris@366: Chris@366: for (int j = 0; j < 3; ++j) { Chris@366: dst[tidx++] = fre.f[j] * fmag.f[j]; Chris@366: dst[tidx++] = fim.f[j] * fmag.f[j]; Chris@366: } Chris@366: } Chris@366: Chris@366: while (i < count) { Chris@366: float real, imag; Chris@366: c_phasor(&real, &imag, phase[i]); Chris@366: dst[tidx++] = real * mag[i]; Chris@366: dst[tidx++] = imag * mag[i]; Chris@366: ++i; Chris@366: } Chris@366: } Chris@366: Chris@366: #endif Chris@366: Chris@366: #ifndef NO_COMPLEX_TYPES Chris@366: Chris@366: void Chris@366: v_polar_to_cartesian(bq_complex_t *const BQ_R__ dst, Chris@366: const bq_complex_element_t *const BQ_R__ mag, Chris@366: const bq_complex_element_t *const BQ_R__ phase, Chris@366: const int count) Chris@366: { Chris@366: if (sizeof(bq_complex_element_t) == sizeof(float)) { Chris@372: v_polar_to_cartesian_interleaved((float *)dst, Chris@372: (const float *)mag, Chris@372: (const float *)phase, Chris@372: count); Chris@366: } else { Chris@372: v_polar_to_cartesian_interleaved((double *)dst, Chris@372: (const double *)mag, Chris@372: (const double *)phase, Chris@372: count); Chris@366: } Chris@366: } Chris@366: Chris@372: void Chris@372: v_cartesian_to_polar(bq_complex_element_t *const BQ_R__ mag, Chris@372: bq_complex_element_t *const BQ_R__ phase, Chris@372: const bq_complex_t *const BQ_R__ src, Chris@372: const int count) Chris@372: { Chris@372: if (sizeof(bq_complex_element_t) == sizeof(float)) { Chris@372: v_cartesian_interleaved_to_polar((float *)mag, Chris@372: (float *)phase, Chris@372: (const float *)src, Chris@372: count); Chris@372: } else { Chris@372: v_cartesian_interleaved_to_polar((double *)mag, Chris@372: (double *)phase, Chris@372: (const double *)src, Chris@372: count); Chris@372: } Chris@372: } Chris@366: Chris@366: void Chris@372: v_cartesian_to_magnitudes(bq_complex_element_t *const BQ_R__ mag, Chris@372: const bq_complex_t *const BQ_R__ src, Chris@372: const int count) Chris@366: { Chris@366: if (sizeof(bq_complex_element_t) == sizeof(float)) { Chris@372: v_cartesian_interleaved_to_magnitudes((float *)mag, Chris@372: (const float *)src, Chris@372: count); Chris@366: } else { Chris@372: v_cartesian_interleaved_to_magnitudes((double *)mag, Chris@372: (const double *)src, Chris@372: count); Chris@366: } Chris@372: } Chris@366: Chris@366: #if defined USE_POMMIER_MATHFUN Chris@366: Chris@366: //!!! further tests reqd. This is only single precision but it seems Chris@366: //!!! to be much faster than normal math library sincos. The comments Chris@366: //!!! note that precision suffers for high arguments to sincos though, Chris@366: //!!! and that is probably a common case for us Chris@366: Chris@366: void Chris@366: v_polar_interleaved_to_cartesian(bq_complex_t *const BQ_R__ dst, Chris@366: const bq_complex_element_t *const BQ_R__ src, Chris@366: const int count) Chris@366: { Chris@366: int idx = 0, tidx = 0; Chris@366: Chris@366: for (int i = 0; i < count; i += 4) { Chris@366: Chris@366: V4SF fmag, fphase, fre, fim; Chris@366: Chris@366: for (int j = 0; j < 3; ++j) { Chris@366: fmag.f[j] = src[idx++]; Chris@366: fphase.f[j] = src[idx++]; Chris@366: } Chris@366: Chris@366: sincos_ps(fphase.v, &fim.v, &fre.v); Chris@366: Chris@366: for (int j = 0; j < 3; ++j) { Chris@366: dst[tidx].re = fre.f[j] * fmag.f[j]; Chris@366: dst[tidx++].im = fim.f[j] * fmag.f[j]; Chris@366: } Chris@366: } Chris@366: } Chris@366: Chris@366: #elif (defined HAVE_IPP || defined HAVE_VDSP) Chris@366: Chris@366: // with a vector library, it should be faster to deinterleave and call Chris@366: // the basic fn Chris@366: Chris@366: void Chris@366: v_polar_interleaved_to_cartesian(bq_complex_t *const BQ_R__ dst, Chris@366: const bq_complex_element_t *const BQ_R__ src, Chris@366: const int count) Chris@366: { Chris@366: bq_complex_element_t *mag = (bq_complex_element_t *) Chris@366: alloca(count * sizeof(bq_complex_element_t)); Chris@366: bq_complex_element_t *phase = (bq_complex_element_t *) Chris@366: alloca(count * sizeof(bq_complex_element_t)); Chris@366: bq_complex_element_t *magphase[] = { mag, phase }; Chris@366: Chris@366: v_deinterleave(magphase, src, 2, count); Chris@366: v_polar_to_cartesian(dst, mag, phase, count); Chris@366: } Chris@366: Chris@366: #else Chris@366: Chris@366: // without a vector library, better avoid the deinterleave step Chris@366: Chris@366: void Chris@366: v_polar_interleaved_to_cartesian(bq_complex_t *const BQ_R__ dst, Chris@366: const bq_complex_element_t *const BQ_R__ src, Chris@366: const int count) Chris@366: { Chris@366: bq_complex_element_t mag, phase; Chris@366: int idx = 0; Chris@366: for (int i = 0; i < count; ++i) { Chris@366: mag = src[idx++]; Chris@366: phase = src[idx++]; Chris@366: dst[i] = c_phasor(phase); Chris@366: dst[i].re *= mag; Chris@366: dst[i].im *= mag; Chris@366: } Chris@366: } Chris@366: Chris@366: #endif Chris@366: Chris@366: void Chris@366: v_polar_interleaved_to_cartesian_inplace(bq_complex_element_t *const BQ_R__ srcdst, Chris@366: const int count) Chris@366: { Chris@366: bq_complex_element_t mag, phase; Chris@366: int ii = 0, io = 0; Chris@366: for (int i = 0; i < count; ++i) { Chris@366: mag = srcdst[ii++]; Chris@366: phase = srcdst[ii++]; Chris@366: bq_complex_t p = c_phasor(phase); Chris@366: srcdst[io++] = mag * p.re; Chris@366: srcdst[io++] = mag * p.im; Chris@366: } Chris@366: } Chris@366: Chris@372: #endif // NO_COMPLEX_TYPES Chris@366: Chris@366: }