diff bqvec/src/VectorOpsComplex.cpp @ 366:5d0a2ebb4d17

Bring dependent libraries in to repo
author Chris Cannam
date Fri, 24 Jun 2016 14:47:45 +0100
parents
children af71cbdab621
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bqvec/src/VectorOpsComplex.cpp	Fri Jun 24 14:47:45 2016 +0100
@@ -0,0 +1,373 @@
+/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
+
+/*
+    bqvec
+
+    A small library for vector arithmetic and allocation in C++ using
+    raw C pointer arrays.
+
+    Copyright 2007-2015 Particular Programs Ltd.
+
+    Permission is hereby granted, free of charge, to any person
+    obtaining a copy of this software and associated documentation
+    files (the "Software"), to deal in the Software without
+    restriction, including without limitation the rights to use, copy,
+    modify, merge, publish, distribute, sublicense, and/or sell copies
+    of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be
+    included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
+    ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+    CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+    WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+    Except as contained in this notice, the names of Chris Cannam and
+    Particular Programs Ltd shall not be used in advertising or
+    otherwise to promote the sale, use or other dealings in this
+    Software without prior written authorization.
+*/
+
+#include "bqvec/VectorOpsComplex.h"
+
+#include <cassert>
+
+#ifdef __MSVC__
+#include <malloc.h>
+#define alloca _alloca
+#endif
+
+#if defined USE_POMMIER_MATHFUN
+#if defined __ARMEL__
+#include "pommier/neon_mathfun.h"
+#else
+#include "pommier/sse_mathfun.h"
+#endif
+#endif
+
+#ifdef __MSVC__
+#include <malloc.h>
+#define alloca _alloca
+#endif
+
+namespace breakfastquay {
+
+#ifdef USE_APPROXIMATE_ATAN2
+float approximate_atan2f(float real, float imag)
+{
+    static const float pi = M_PI;
+    static const float pi2 = M_PI / 2;
+
+    float atan;
+
+    if (real == 0.f) {
+
+        if (imag > 0.0f) atan = pi2;
+        else if (imag == 0.0f) atan = 0.0f;
+        else atan = -pi2;
+
+    } else {
+
+        float z = imag/real;
+
+        if (fabsf(z) < 1.f) {
+            atan = z / (1.f + 0.28f * z * z);
+            if (real < 0.f) {
+                if (imag < 0.f) atan -= pi;
+                else atan += pi;
+            }
+        } else {
+            atan = pi2 - z / (z * z + 0.28f);
+            if (imag < 0.f) atan -= pi;
+        }
+    }
+}
+#endif
+
+#if defined USE_POMMIER_MATHFUN
+
+#ifdef __ARMEL__
+typedef union {
+  float f[4];
+  int i[4];
+  v4sf  v;
+} V4SF;
+#else
+typedef ALIGN16_BEG union {
+  float f[4];
+  int i[4];
+  v4sf  v;
+} ALIGN16_END V4SF;
+#endif
+
+void
+v_polar_to_cartesian_pommier(float *const BQ_R__ real,
+                             float *const BQ_R__ imag,
+                             const float *const BQ_R__ mag,
+                             const float *const BQ_R__ phase,
+                             const int count)
+{
+    int idx = 0, tidx = 0;
+    int i = 0;
+
+    for (int i = 0; i + 4 < count; i += 4) {
+
+	V4SF fmag, fphase, fre, fim;
+
+        for (int j = 0; j < 3; ++j) {
+            fmag.f[j] = mag[idx];
+            fphase.f[j] = phase[idx++];
+        }
+
+	sincos_ps(fphase.v, &fim.v, &fre.v);
+
+        for (int j = 0; j < 3; ++j) {
+            real[tidx] = fre.f[j] * fmag.f[j];
+            imag[tidx++] = fim.f[j] * fmag.f[j];
+        }
+    }
+
+    while (i < count) {
+        float re, im;
+        c_phasor(&re, &im, phase[i]);
+        real[tidx] = re * mag[i];
+        imag[tidx++] = im * mag[i];
+        ++i;
+    }
+}    
+
+void
+v_polar_interleaved_to_cartesian_inplace_pommier(float *const BQ_R__ srcdst,
+                                                 const int count)
+{
+    int i;
+    int idx = 0, tidx = 0;
+
+    for (i = 0; i + 4 < count; i += 4) {
+
+	V4SF fmag, fphase, fre, fim;
+
+        for (int j = 0; j < 3; ++j) {
+            fmag.f[j] = srcdst[idx++];
+            fphase.f[j] = srcdst[idx++];
+        }
+
+	sincos_ps(fphase.v, &fim.v, &fre.v);
+
+        for (int j = 0; j < 3; ++j) {
+            srcdst[tidx++] = fre.f[j] * fmag.f[j];
+            srcdst[tidx++] = fim.f[j] * fmag.f[j];
+        }
+    }
+
+    while (i < count) {
+        float real, imag;
+        float mag = srcdst[idx++];
+        float phase = srcdst[idx++];
+        c_phasor(&real, &imag, phase);
+        srcdst[tidx++] = real * mag;
+        srcdst[tidx++] = imag * mag;
+        ++i;
+    }
+}    
+
+void
+v_polar_to_cartesian_interleaved_pommier(float *const BQ_R__ dst,
+                                         const float *const BQ_R__ mag,
+                                         const float *const BQ_R__ phase,
+                                         const int count)
+{
+    int i;
+    int idx = 0, tidx = 0;
+
+    for (i = 0; i + 4 <= count; i += 4) {
+
+	V4SF fmag, fphase, fre, fim;
+
+        for (int j = 0; j < 3; ++j) {
+            fmag.f[j] = mag[idx];
+            fphase.f[j] = phase[idx];
+            ++idx;
+        }
+
+	sincos_ps(fphase.v, &fim.v, &fre.v);
+
+        for (int j = 0; j < 3; ++j) {
+            dst[tidx++] = fre.f[j] * fmag.f[j];
+            dst[tidx++] = fim.f[j] * fmag.f[j];
+        }
+    }
+
+    while (i < count) {
+        float real, imag;
+        c_phasor(&real, &imag, phase[i]);
+        dst[tidx++] = real * mag[i];
+        dst[tidx++] = imag * mag[i];
+        ++i;
+    }
+}    
+
+#endif
+
+#ifndef NO_COMPLEX_TYPES
+
+#if defined HAVE_IPP
+
+void
+v_polar_to_cartesian(bq_complex_t *const BQ_R__ dst,
+		     const bq_complex_element_t *const BQ_R__ mag,
+		     const bq_complex_element_t *const BQ_R__ phase,
+		     const int count)
+{
+    if (sizeof(bq_complex_element_t) == sizeof(float)) {
+	ippsPolarToCart_32fc((const float *)mag, (const float *)phase,
+                             (Ipp32fc *)dst, count);
+    } else {
+	ippsPolarToCart_64fc((const double *)mag, (const double *)phase,
+                             (Ipp64fc *)dst, count);
+    }
+}
+
+#elif defined HAVE_VDSP
+
+void
+v_polar_to_cartesian(bq_complex_t *const BQ_R__ dst,
+		     const bq_complex_element_t *const BQ_R__ mag,
+		     const bq_complex_element_t *const BQ_R__ phase,
+		     const int count)
+{
+    bq_complex_element_t *sc = (bq_complex_element_t *)
+	alloca(count * 2 * sizeof(bq_complex_element_t));
+
+    if (sizeof(bq_complex_element_t) == sizeof(float)) {
+        vvsincosf((float *)sc, (float *)(sc + count), (float *)phase, &count);
+    } else {
+        vvsincos((double *)sc, (double *)(sc + count), (double *)phase, &count);
+    }
+
+    int sini = 0;
+    int cosi = count;
+
+    for (int i = 0; i < count; ++i) {
+	dst[i].re = mag[i] * sc[cosi++];
+	dst[i].im = mag[i] * sc[sini++];
+    }
+}    
+
+#else
+
+void
+v_polar_to_cartesian(bq_complex_t *const BQ_R__ dst,
+		     const bq_complex_element_t *const BQ_R__ mag,
+		     const bq_complex_element_t *const BQ_R__ phase,
+		     const int count)
+{
+    for (int i = 0; i < count; ++i) {
+	dst[i] = c_phasor(phase[i]);
+    }
+    for (int i = 0; i < count; ++i) {
+        dst[i].re *= mag[i];
+        dst[i].im *= mag[i];
+    }
+}    
+
+#endif
+
+#if defined USE_POMMIER_MATHFUN
+
+//!!! further tests reqd.  This is only single precision but it seems
+//!!! to be much faster than normal math library sincos.  The comments
+//!!! note that precision suffers for high arguments to sincos though,
+//!!! and that is probably a common case for us
+
+void
+v_polar_interleaved_to_cartesian(bq_complex_t *const BQ_R__ dst,
+				 const bq_complex_element_t *const BQ_R__ src,
+				 const int count)
+{
+    int idx = 0, tidx = 0;
+
+    for (int i = 0; i < count; i += 4) {
+
+	V4SF fmag, fphase, fre, fim;
+
+        for (int j = 0; j < 3; ++j) {
+            fmag.f[j] = src[idx++];
+            fphase.f[j] = src[idx++];
+        }
+
+	sincos_ps(fphase.v, &fim.v, &fre.v);
+
+        for (int j = 0; j < 3; ++j) {
+            dst[tidx].re = fre.f[j] * fmag.f[j];
+            dst[tidx++].im = fim.f[j] * fmag.f[j];
+        }
+    }
+}    
+
+#elif (defined HAVE_IPP || defined HAVE_VDSP)
+
+// with a vector library, it should be faster to deinterleave and call
+// the basic fn
+
+void
+v_polar_interleaved_to_cartesian(bq_complex_t *const BQ_R__ dst,
+				 const bq_complex_element_t *const BQ_R__ src,
+				 const int count)
+{
+    bq_complex_element_t *mag = (bq_complex_element_t *)
+	alloca(count * sizeof(bq_complex_element_t));
+    bq_complex_element_t *phase = (bq_complex_element_t *)
+	alloca(count * sizeof(bq_complex_element_t));
+    bq_complex_element_t *magphase[] = { mag, phase };
+
+    v_deinterleave(magphase, src, 2, count);
+    v_polar_to_cartesian(dst, mag, phase, count);
+}
+
+#else
+
+// without a vector library, better avoid the deinterleave step
+
+void
+v_polar_interleaved_to_cartesian(bq_complex_t *const BQ_R__ dst,
+				 const bq_complex_element_t *const BQ_R__ src,
+				 const int count)
+{
+    bq_complex_element_t mag, phase;
+    int idx = 0;
+    for (int i = 0; i < count; ++i) {
+        mag = src[idx++];
+        phase = src[idx++];
+        dst[i] = c_phasor(phase);
+        dst[i].re *= mag;
+        dst[i].im *= mag;
+    }
+}
+
+#endif
+
+void
+v_polar_interleaved_to_cartesian_inplace(bq_complex_element_t *const BQ_R__ srcdst,
+                                         const int count)
+{
+    // Not ideal
+    bq_complex_element_t mag, phase;
+    int ii = 0, io = 0;
+    for (int i = 0; i < count; ++i) {
+        mag = srcdst[ii++];
+        phase = srcdst[ii++];
+        bq_complex_t p = c_phasor(phase);
+        srcdst[io++] = mag * p.re;
+        srcdst[io++] = mag * p.im;
+    }
+}
+
+#endif
+
+}