Mercurial > hg > silvet
changeset 372:af71cbdab621 tip
Update bqvec code
author | Chris Cannam |
---|---|
date | Tue, 19 Nov 2019 10:13:32 +0000 |
parents | 426ce52ef096 |
children | |
files | bqvec/COPYING bqvec/Makefile bqvec/README.md bqvec/README.txt bqvec/bqvec/Allocators.h bqvec/bqvec/Barrier.h bqvec/bqvec/ComplexTypes.h bqvec/bqvec/Range.h bqvec/bqvec/Restrict.h bqvec/bqvec/RingBuffer.h bqvec/bqvec/VectorOps.h bqvec/bqvec/VectorOpsComplex.h bqvec/build/Makefile.inc bqvec/build/Makefile.linux bqvec/build/Makefile.linux.ipp bqvec/build/Makefile.linux.min bqvec/build/Makefile.osx bqvec/build/run-platform-tests.sh bqvec/pommier/sse_mathfun.h bqvec/src/Allocators.cpp bqvec/src/Barrier.cpp bqvec/src/VectorOpsComplex.cpp bqvec/test/TestAllocators.cpp bqvec/test/TestVectorOps.cpp bqvec/test/TestVectorOps.h bqvec/test/TestVectorOpsComplex.cpp bqvec/test/Timings.cpp |
diffstat | 27 files changed, 2981 insertions(+), 632 deletions(-) [+] |
line wrap: on
line diff
--- a/bqvec/COPYING Sat Nov 12 09:59:34 2016 +0000 +++ b/bqvec/COPYING Tue Nov 19 10:13:32 2019 +0000 @@ -1,7 +1,7 @@ The following terms apply to the code in the src/ directory: - Copyright 2007-2014 Particular Programs Ltd. + Copyright 2007-2017 Particular Programs Ltd. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
--- a/bqvec/Makefile Sat Nov 12 09:59:34 2016 +0000 +++ b/bqvec/Makefile Tue Nov 19 10:13:32 2019 +0000 @@ -7,9 +7,24 @@ # -DHAVE_IPP Intel's Integrated Performance Primitives are available # -DHAVE_VDSP Apple's Accelerate framework is available # -# These are optional (they affect performance, not function) and you -# may define more than one of them. -# +# The above are optional (they affect performance, not function) and +# you may define more than one of them. +# +# The following two options trade off speed against precision for single- +# precision paths in cases where IPP and VDSP are not available: +# +# -DUSE_POMMIER_MATHFUN Use Julien Pommier's SSE/NEON implementation +# of sincos in 32-bit polar-to-cartesian conversion +# -DUSE_APPROXIMATE_ATAN2 Use a quick but *very* approximate atan2 +# function in 32-bit cartesian-to-polar conversion +# +# And a handful of miscellaneous flags: +# +# -DLACK_SINCOS Math library lacks sincos() function +# -DNO_COMPLEX_TYPES Don't bother defining bq_complex_t functions +# -DUSE_SINGLE_PRECISION_COMPLEX Use float, not double, for bq_complex_t +# -DNO_EXCEPTIONS Don't throw exceptions (abort instead) +# # Add any relevant -I flags for include paths as well. # # Note that you must supply the same flags when including bqvec @@ -17,72 +32,57 @@ # may find it simplest to just add the bqvec source files to your # application's build system and not build a bqvec library at all.) -VECTOR_DEFINES := +VECTOR_DEFINES := # Add to ALLOCATOR_DEFINES options relating to aligned malloc. +# These are not usually necessary. # # Available options are # -# -DHAVE_POSIX_MEMALIGN The posix_memalign call is available in sys/mman.h -# -DLACK_POSIX_MEMALIGN The posix_memalign call is not available +# -DHAVE_POSIX_MEMALIGN The posix_memalign call is available in sys/mman.h +# -DLACK_POSIX_MEMALIGN The posix_memalign call is not available # -# -DMALLOC_IS_ALIGNED The malloc call already returns aligned memory -# -DMALLOC_IS_NOT_ALIGNED The malloc call does not return aligned memory +# -DMALLOC_IS_ALIGNED The malloc call already returns aligned memory +# -DMALLOC_IS_NOT_ALIGNED The malloc call does not return aligned memory # -# -DUSE_OWN_ALIGNED_MALLOC No aligned malloc is available, roll your own +# -DUSE_OWN_ALIGNED_MALLOC If no aligned malloc is available, roll your own +# -DAVOID_OWN_ALIGNED_MALLOC If no aligned malloc is available, refuse to build # -# -DLACK_BAD_ALLOC The C++ library lacks the std::bad_alloc exception +# -DLACK_BAD_ALLOC The C++ library lacks the std::bad_alloc exception # # Here "aligned" is assumed to mean "aligned enough for whatever -# vector stuff the space will be used for" which most likely means +# vector stuff the space will be used for" which likely means at least # 16-byte alignment. # -# The default is to use _aligned_malloc when building with Visual C++, -# system malloc when building on OS/X, and posix_memalign otherwise. +# If no options are provided, we will use IPP functions if HAVE_IPP is +# defined, or else use _aligned_malloc when building with Visual C++ +# on Windows, roll our own when building with some other compiler on +# Windows, use system malloc when building on OS/X, and use +# posix_memalign elsewhere. # # Note that you must supply the same flags when including bqvec # headers later as you are using now when compiling the library. (You # may find it simplest to just add the bqvec source files to your # application's build system and not build a bqvec library at all.) -ALLOCATOR_DEFINES := +ALLOCATOR_DEFINES := -SRC_DIR := src -HEADER_DIR := bqvec +# Add any related includes and libraries here +# +THIRD_PARTY_INCLUDES := +THIRD_PARTY_LIBS := -SOURCES := $(wildcard $(SRC_DIR)/*.cpp) -HEADERS := $(wildcard $(HEADER_DIR)/*.h) $(wildcard $(SRC_DIR)/*.h) -OBJECTS := $(SOURCES:.cpp=.o) -OBJECTS := $(OBJECTS:.c=.o) +# If you are including a set of bq libraries into a project, you can +# override variables for all of them (including all of the above) in +# the following file, which all bq* Makefiles will include if found -CXXFLAGS := $(VECTOR_DEFINES) $(ALLOCATOR_DEFINES) -I. +-include ../Makefile.inc-bq -LIBRARY := libbqvec.a -all: $(LIBRARY) +# This project-local Makefile describes the source files and contains +# no routinely user-modifiable parts -$(LIBRARY): $(OBJECTS) - $(AR) rc $@ $^ - -clean: - rm -f $(OBJECTS) - -distclean: clean - rm -f $(LIBRARY) - -depend: - makedepend -Y -fMakefile $(SOURCES) $(HEADERS) - - -# DO NOT DELETE - -src/VectorOpsComplex.o: bqvec/VectorOpsComplex.h bqvec/VectorOps.h -src/VectorOpsComplex.o: bqvec/Restrict.h bqvec/ComplexTypes.h -src/Allocators.o: bqvec/Allocators.h bqvec/VectorOps.h bqvec/Restrict.h -bqvec/VectorOpsComplex.o: bqvec/VectorOps.h bqvec/Restrict.h -bqvec/VectorOpsComplex.o: bqvec/ComplexTypes.h -bqvec/VectorOps.o: bqvec/Restrict.h -bqvec/Allocators.o: bqvec/VectorOps.h bqvec/Restrict.h +include build/Makefile.inc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bqvec/README.md Tue Nov 19 10:13:32 2019 +0000 @@ -0,0 +1,36 @@ + +bqvec +===== + +A small library for vector management and arithmetic in C++ using raw +C pointer arrays, designed for simple audio buffer-shuffling. Also +includes aligned malloc wrappers and a lock-free ring buffer. + +The code can call out to vector arithmetic helpers (IPP, vDSP) in some +places, and has loops written with an eye to auto-vectorising +compilers, but mostly this is a convenience library rather than for +performance -- it initially exists to give a fairly consistent API to +useful functions over audio buffer arrays. + +This code originated as part of the Rubber Band Library written by the +same authors (see https://hg.sr.ht/~breakfastquay/rubberband/). +It has been pulled out into a separate library and relicensed under a +more permissive licence. + +Generally expected to be vendored in to local project builds rather +than being installed as a system library. + +C++ standard required: C++98 (does not use C++11 or newer features) + + * To compile on Linux: make + + * To compile on macOS: make -f build/Makefile.osx + + * To build and run tests: as above, but add the "test" target - + requires Boost test headers installed + +[](https://travis-ci.org/breakfastquay/bqvec) + +Copyright 2007-2018 Particular Programs Ltd. See the file COPYING for +(BSD/MIT-style) licence terms. +
--- a/bqvec/README.txt Sat Nov 12 09:59:34 2016 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,15 +0,0 @@ - -bqvec -===== - -A small library for efficient vector arithmetic and allocation in C++ -using raw C pointer arrays. - -Copyright 2007-2014 Particular Programs Ltd. - -This code originated as part of the Rubber Band Library written by the -same authors (see https://bitbucket.org/breakfastquay/rubberband/). -It has been pulled out into a separate library and relicensed under a -more permissive licence: a BSD/MIT-style licence, as opposed to the -GPL used for Rubber Band. See the file COPYING for details. -
--- a/bqvec/bqvec/Allocators.h Sat Nov 12 09:59:34 2016 +0000 +++ b/bqvec/bqvec/Allocators.h Tue Nov 19 10:13:32 2019 +0000 @@ -6,7 +6,7 @@ A small library for vector arithmetic and allocation in C++ using raw C pointer arrays. - Copyright 2007-2015 Particular Programs Ltd. + Copyright 2007-2017 Particular Programs Ltd. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation @@ -36,35 +36,65 @@ #ifndef BQVEC_ALLOCATORS_H #define BQVEC_ALLOCATORS_H -#include "VectorOps.h" +/* + * Aligned and per-channel allocators and deallocators for raw C array + * buffers and C++ STL vectors. + */ -#include <new> // for std::bad_alloc +#include <new> #include <stdlib.h> +#include <stddef.h> +#include <stdexcept> #ifndef HAVE_POSIX_MEMALIGN +#ifndef LACK_POSIX_MEMALIGN #ifndef _WIN32 #ifndef __APPLE__ -#ifndef LACK_POSIX_MEMALIGN #define HAVE_POSIX_MEMALIGN #endif #endif #endif #endif +#ifndef MALLOC_IS_ALIGNED #ifndef MALLOC_IS_NOT_ALIGNED -#ifdef __APPLE_ +#ifdef __APPLE__ +#define MALLOC_IS_ALIGNED +#endif +#endif +#endif + +#ifndef HAVE__ALIGNED_MALLOC +#ifndef LACK__ALIGNED_MALLOC +#ifdef _WIN32 +#define HAVE__ALIGNED_MALLOC +#endif +#endif +#endif + +#ifndef USE_OWN_ALIGNED_MALLOC +#ifndef AVOID_OWN_ALIGNED_MALLOC +#ifndef HAVE_POSIX_MEMALIGN #ifndef MALLOC_IS_ALIGNED -#define MALLOC_IS_ALIGNED +#ifndef HAVE__ALIGNED_MALLOC +#define USE_OWN_ALIGNED_MALLOC +#endif +#endif #endif #endif #endif #ifdef HAVE_POSIX_MEMALIGN #include <sys/mman.h> +#include <errno.h> #endif +#ifndef NO_EXCEPTIONS #ifdef LACK_BAD_ALLOC -namespace std { struct bad_alloc { }; } +namespace std { + struct bad_alloc { }; +} +#endif #endif namespace breakfastquay { @@ -73,9 +103,45 @@ T *allocate(size_t count) { void *ptr = 0; - // 32-byte alignment is required for at least OpenMAX + + // We'd like to check HAVE_IPP first and, if it's defined, call + // ippsMalloc_8u(count * sizeof(T)). But that isn't a general + // replacement for malloc() because it only takes an int + // argument. So we save it for the specialisations of + // allocate<float> and allocate<double> below, where we're more + // likely to get away with it. + +#ifdef MALLOC_IS_ALIGNED + ptr = malloc(count * sizeof(T)); +#else /* !MALLOC_IS_ALIGNED */ + + // That's the "sufficiently aligned" functions dealt with, the + // rest need a specific alignment provided to the call. 32-byte + // alignment is required for at least OpenMAX static const int alignment = 32; + +#ifdef HAVE__ALIGNED_MALLOC + ptr = _aligned_malloc(count * sizeof(T), alignment); +#else /* !HAVE__ALIGNED_MALLOC */ + +#ifdef HAVE_POSIX_MEMALIGN + int rv = posix_memalign(&ptr, alignment, count * sizeof(T)); + if (rv) { +#ifndef NO_EXCEPTIONS + if (rv == EINVAL) { + throw std::logic_error("Internal error: invalid alignment"); + } else { + throw std::bad_alloc(); + } +#else + abort(); +#endif + } +#else /* !HAVE_POSIX_MEMALIGN */ + #ifdef USE_OWN_ALIGNED_MALLOC +#pragma message("Rolling own aligned malloc: this is unlikely to perform as well as the alternatives") + // Alignment must be a power of two, bigger than the pointer // size. Stuff the actual malloc'd pointer in just before the // returned value. This is the least desirable way to do this -- @@ -86,34 +152,32 @@ char *adj = (char *)buf; while ((unsigned long long)adj & (alignment-1)) --adj; ptr = ((char *)adj) + alignment; + new (((void **)ptr)[-1]) (void *); ((void **)ptr)[-1] = buf; } + #else /* !USE_OWN_ALIGNED_MALLOC */ -#ifdef HAVE_POSIX_MEMALIGN - if (posix_memalign(&ptr, alignment, count * sizeof(T))) { - ptr = malloc(count * sizeof(T)); - } -#else /* !HAVE_POSIX_MEMALIGN */ -#ifdef __MSVC__ - ptr = _aligned_malloc(count * sizeof(T), alignment); -#else /* !__MSVC__ */ -#ifndef MALLOC_IS_ALIGNED -#error "No aligned malloc available: define MALLOC_IS_ALIGNED to stick with system malloc, HAVE_POSIX_MEMALIGN if posix_memalign is available, or USE_OWN_ALIGNED_MALLOC to roll our own" -#endif - // Note that malloc always aligns to 16 byte boundaries on OS/X - ptr = malloc(count * sizeof(T)); - (void)alignment; // avoid compiler warning for unused -#endif /* !__MSVC__ */ + +#error "No aligned malloc available: define MALLOC_IS_ALIGNED to use system malloc, HAVE_POSIX_MEMALIGN if posix_memalign is available, HAVE__ALIGNED_MALLOC if _aligned_malloc is available, or USE_OWN_ALIGNED_MALLOC to roll our own" + +#endif /* !USE_OWN_ALIGNED_MALLOC */ #endif /* !HAVE_POSIX_MEMALIGN */ -#endif /* !USE_OWN_ALIGNED_MALLOC */ +#endif /* !HAVE__ALIGNED_MALLOC */ +#endif /* !MALLOC_IS_ALIGNED */ + if (!ptr) { #ifndef NO_EXCEPTIONS - throw(std::bad_alloc()); + throw std::bad_alloc(); #else abort(); #endif } - return (T *)ptr; + + T *typed_ptr = static_cast<T *>(ptr); + for (size_t i = 0; i < count; ++i) { + new (typed_ptr + i) T; + } + return typed_ptr; } #ifdef HAVE_IPP @@ -130,22 +194,39 @@ T *allocate_and_zero(size_t count) { T *ptr = allocate<T>(count); - v_zero(ptr, count); + for (size_t i = 0; i < count; ++i) { + ptr[i] = T(); + } return ptr; } template <typename T> void deallocate(T *ptr) { + if (!ptr) return; + +#ifdef MALLOC_IS_ALIGNED + free((void *)ptr); +#else /* !MALLOC_IS_ALIGNED */ + +#ifdef HAVE__ALIGNED_MALLOC + _aligned_free((void *)ptr); +#else /* !HAVE__ALIGNED_MALLOC */ + +#ifdef HAVE_POSIX_MEMALIGN + free((void *)ptr); +#else /* !HAVE_POSIX_MEMALIGN */ + #ifdef USE_OWN_ALIGNED_MALLOC - if (ptr) free(((void **)ptr)[-1]); + free(((void **)ptr)[-1]); #else /* !USE_OWN_ALIGNED_MALLOC */ -#ifdef __MSVC__ - if (ptr) _aligned_free((void *)ptr); -#else /* !__MSVC__ */ - if (ptr) free((void *)ptr); -#endif /* !__MSVC__ */ + +#error "No aligned malloc available: define MALLOC_IS_ALIGNED to use system malloc, HAVE_POSIX_MEMALIGN if posix_memalign is available, or USE_OWN_ALIGNED_MALLOC to roll our own" + #endif /* !USE_OWN_ALIGNED_MALLOC */ +#endif /* !HAVE_POSIX_MEMALIGN */ +#endif /* !HAVE__ALIGNED_MALLOC */ +#endif /* !MALLOC_IS_ALIGNED */ } #ifdef HAVE_IPP @@ -164,7 +245,11 @@ { T *newptr = allocate<T>(count); if (oldcount && ptr) { - v_copy(newptr, ptr, oldcount < count ? oldcount : count); + size_t tocopy = oldcount; + if (count < oldcount) tocopy = count; + for (size_t i = 0; i < tocopy; ++i) { + newptr[i] = ptr[i]; + } } if (ptr) deallocate<T>(ptr); return newptr; @@ -175,7 +260,9 @@ T *reallocate_and_zero(T *ptr, size_t oldcount, size_t count) { ptr = reallocate(ptr, oldcount, count); - v_zero(ptr, count); + for (size_t i = 0; i < count; ++i) { + ptr[i] = T(); + } return ptr; } @@ -184,7 +271,11 @@ T *reallocate_and_zero_extension(T *ptr, size_t oldcount, size_t count) { ptr = reallocate(ptr, oldcount, count); - if (count > oldcount) v_zero(ptr + oldcount, count - oldcount); + if (count > oldcount) { + for (size_t i = oldcount; i < count; ++i) { + ptr[i] = T(); + } + } return ptr; } @@ -229,7 +320,11 @@ { T **newptr = allocate_channels<T>(channels, count); if (oldcount && ptr) { - v_copy_channels(newptr, ptr, channels, oldcount < count ? oldcount : count); + for (size_t c = 0; c < channels; ++c) { + for (size_t i = 0; i < oldcount && i < count; ++i) { + newptr[c][i] = ptr[c][i]; + } + } } if (ptr) deallocate_channels<T>(ptr, oldchannels); return newptr; @@ -242,23 +337,121 @@ { T **newptr = allocate_and_zero_channels<T>(channels, count); if (oldcount && ptr) { - v_copy_channels(newptr, ptr, channels, oldcount < count ? oldcount : count); + for (size_t c = 0; c < channels; ++c) { + for (size_t i = 0; i < oldcount && i < count; ++i) { + newptr[c][i] = ptr[c][i]; + } + } } if (ptr) deallocate_channels<T>(ptr, oldchannels); return newptr; } -/// RAII class to call deallocate() on destruction + +/** Trivial RAII class to call deallocate() on destruction. + */ template <typename T> class Deallocator { public: Deallocator(T *t) : m_t(t) { } ~Deallocator() { deallocate<T>(m_t); } + private: T *m_t; }; + +/** Allocator for use with STL classes, e.g. vector, to ensure + * alignment. Based on example code by Stephan T. Lavavej. + * + * e.g. std::vector<float, StlAllocator<float> > v; + */ +template <typename T> +class StlAllocator +{ +public: + typedef T *pointer; + typedef const T *const_pointer; + typedef T &reference; + typedef const T &const_reference; + typedef T value_type; + typedef size_t size_type; + typedef ptrdiff_t difference_type; + + StlAllocator() { } + StlAllocator(const StlAllocator&) { } + template <typename U> StlAllocator(const StlAllocator<U>&) { } + ~StlAllocator() { } + + T * + allocate(const size_t n) const { + if (n == 0) return 0; + if (n > max_size()) { +#ifndef NO_EXCEPTIONS + throw std::length_error("Size overflow in StlAllocator::allocate()"); +#else + abort(); +#endif + } + return ::breakfastquay::allocate<T>(n); + } + + void + deallocate(T *const p, const size_t) const { + ::breakfastquay::deallocate(p); + } + + template <typename U> + T * + allocate(const size_t n, const U *) const { + return allocate(n); + } + + T * + address(T &r) const { + return &r; + } + + const T * + address(const T &s) const { + return &s; + } + + size_t + max_size() const { + return (static_cast<size_t>(0) - static_cast<size_t>(1)) / sizeof(T); + } + + template <typename U> struct rebind { + typedef StlAllocator<U> other; + }; + + bool + operator==(const StlAllocator &) const { + return true; + } + + bool + operator!=(const StlAllocator &) const { + return false; + } + + void + construct(T *const p, const T &t) const { + void *const pv = static_cast<void *>(p); + new (pv) T(t); + } + + void + destroy(T *const p) const { + p->~T(); + } + +private: + StlAllocator& operator=(const StlAllocator&); +}; + } #endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bqvec/bqvec/Barrier.h Tue Nov 19 10:13:32 2019 +0000 @@ -0,0 +1,45 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + bqvec + + A small library for vector arithmetic and allocation in C++ using + raw C pointer arrays. + + Copyright 2007-2017 Particular Programs Ltd. + + Permission is hereby granted, free of charge, to any person + obtaining a copy of this software and associated documentation + files (the "Software"), to deal in the Software without + restriction, including without limitation the rights to use, copy, + modify, merge, publish, distribute, sublicense, and/or sell copies + of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + Except as contained in this notice, the names of Chris Cannam and + Particular Programs Ltd shall not be used in advertising or + otherwise to promote the sale, use or other dealings in this + Software without prior written authorization. +*/ + +#ifndef BQVEC_BARRIER_H +#define BQVEC_BARRIER_H + +namespace breakfastquay { + extern void system_memorybarrier(); +} + +#define BQ_MBARRIER() ::breakfastquay::system_memorybarrier() + +#endif
--- a/bqvec/bqvec/ComplexTypes.h Sat Nov 12 09:59:34 2016 +0000 +++ b/bqvec/bqvec/ComplexTypes.h Tue Nov 19 10:13:32 2019 +0000 @@ -6,7 +6,7 @@ A small library for vector arithmetic and allocation in C++ using raw C pointer arrays. - Copyright 2007-2015 Particular Programs Ltd. + Copyright 2007-2017 Particular Programs Ltd. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation @@ -39,12 +39,19 @@ namespace breakfastquay { #ifndef NO_COMPLEX_TYPES + +#ifdef USE_SINGLE_PRECISION_COMPLEX +typedef float bq_complex_element_t; +#else +typedef double bq_complex_element_t; +#endif + // Convertible with other complex types that store re+im consecutively -typedef double bq_complex_element_t; struct bq_complex_t { bq_complex_element_t re; bq_complex_element_t im; }; + #endif }
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bqvec/bqvec/Range.h Tue Nov 19 10:13:32 2019 +0000 @@ -0,0 +1,57 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + bqvec + + A small library for vector arithmetic and allocation in C++ using + raw C pointer arrays. + + Copyright 2007-2017 Particular Programs Ltd. + + Permission is hereby granted, free of charge, to any person + obtaining a copy of this software and associated documentation + files (the "Software"), to deal in the Software without + restriction, including without limitation the rights to use, copy, + modify, merge, publish, distribute, sublicense, and/or sell copies + of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + Except as contained in this notice, the names of Chris Cannam and + Particular Programs Ltd shall not be used in advertising or + otherwise to promote the sale, use or other dealings in this + Software without prior written authorization. +*/ + +#ifndef BQVEC_RANGE_H +#define BQVEC_RANGE_H + +namespace breakfastquay { + +/** Check whether an integer index is in range for a container, + avoiding overflows and signed/unsigned comparison warnings. +*/ +template<typename T, typename C> +bool in_range_for(const C &container, T i) +{ + if (i < 0) return false; + if (sizeof(T) > sizeof(typename C::size_type)) { + return i < static_cast<T>(container.size()); + } else { + return static_cast<typename C::size_type>(i) < container.size(); + } +} + +} + +#endif
--- a/bqvec/bqvec/Restrict.h Sat Nov 12 09:59:34 2016 +0000 +++ b/bqvec/bqvec/Restrict.h Tue Nov 19 10:13:32 2019 +0000 @@ -6,7 +6,7 @@ A small library for vector arithmetic and allocation in C++ using raw C pointer arrays. - Copyright 2007-2015 Particular Programs Ltd. + Copyright 2007-2017 Particular Programs Ltd. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation @@ -36,13 +36,21 @@ #ifndef BQVEC_RESTRICT_H #define BQVEC_RESTRICT_H +#ifdef _MSC_VER +#define BQ_R__ __restrict +#else #ifdef __MSVC__ #define BQ_R__ __restrict #endif +#endif +#ifdef __clang__ +#define BQ_R__ __restrict__ +#else #ifdef __GNUC__ #define BQ_R__ __restrict__ #endif +#endif #ifndef BQ_R__ #define BQ_R__
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bqvec/bqvec/RingBuffer.h Tue Nov 19 10:13:32 2019 +0000 @@ -0,0 +1,518 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + bqvec + + A small library for vector arithmetic and allocation in C++ using + raw C pointer arrays. + + Copyright 2007-2017 Particular Programs Ltd. + + Permission is hereby granted, free of charge, to any person + obtaining a copy of this software and associated documentation + files (the "Software"), to deal in the Software without + restriction, including without limitation the rights to use, copy, + modify, merge, publish, distribute, sublicense, and/or sell copies + of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + Except as contained in this notice, the names of Chris Cannam and + Particular Programs Ltd shall not be used in advertising or + otherwise to promote the sale, use or other dealings in this + Software without prior written authorization. +*/ + +#ifndef BQVEC_RINGBUFFER_H +#define BQVEC_RINGBUFFER_H + +//#define DEBUG_RINGBUFFER 1 + +#include "Barrier.h" +#include "Allocators.h" +#include "Restrict.h" +#include "VectorOps.h" + +#include <sys/types.h> + +#include <iostream> + +namespace breakfastquay { + +/** + * RingBuffer implements a lock-free ring buffer for one writer and + * one reader, that is to be used to store a sample type T. + * + * RingBuffer is thread-safe provided only one thread writes and only + * one thread reads. + */ +template <typename T> +class RingBuffer +{ +public: + /** + * Create a ring buffer with room to write n samples. + * + * Note that the internal storage size will actually be n+1 + * samples, as one element is unavailable for administrative + * reasons. Since the ring buffer performs best if its size is a + * power of two, this means n should ideally be some power of two + * minus one. + */ + RingBuffer(int n); + + virtual ~RingBuffer(); + + /** + * Return the total capacity of the ring buffer in samples. + * (This is the argument n passed to the constructor.) + */ + int getSize() const; + + /** + * Return a new ring buffer (allocated with "new" -- caller must + * delete when no longer needed) of the given size, containing the + * same data as this one. If another thread reads from or writes + * to this buffer during the call, the results may be incomplete + * or inconsistent. If this buffer's data will not fit in the new + * size, the contents are undefined. + */ + RingBuffer<T> *resized(int newSize) const; + + /** + * Reset read and write pointers, thus emptying the buffer. + * Should be called from the write thread. + */ + void reset(); + + /** + * Return the amount of data available for reading, in samples. + */ + int getReadSpace() const; + + /** + * Return the amount of space available for writing, in samples. + */ + int getWriteSpace() const; + + /** + * Read n samples from the buffer. If fewer than n are available, + * the remainder will be zeroed out. Returns the number of + * samples actually read. + * + * This is a template function, taking an argument S for the target + * sample type, which is permitted to differ from T if the two + * types are compatible for arithmetic operations. + */ + template <typename S> + int read(S *const BQ_R__ destination, int n); + + /** + * Read n samples from the buffer, adding them to the destination. + * If fewer than n are available, the remainder will be left + * alone. Returns the number of samples actually read. + * + * This is a template function, taking an argument S for the target + * sample type, which is permitted to differ from T if the two + * types are compatible for arithmetic operations. + */ + template <typename S> + int readAdding(S *const BQ_R__ destination, int n); + + /** + * Read one sample from the buffer. If no sample is available, + * this will silently return zero. Calling this repeatedly is + * obviously slower than calling read once, but it may be good + * enough if you don't want to allocate a buffer to read into. + */ + T readOne(); + + /** + * Read n samples from the buffer, if available, without advancing + * the read pointer -- i.e. a subsequent read() or skip() will be + * necessary to empty the buffer. If fewer than n are available, + * the remainder will be zeroed out. Returns the number of + * samples actually read. + */ + int peek(T *const BQ_R__ destination, int n) const; + + /** + * Read one sample from the buffer, if available, without + * advancing the read pointer -- i.e. a subsequent read() or + * skip() will be necessary to empty the buffer. Returns zero if + * no sample was available. + */ + T peekOne() const; + + /** + * Pretend to read n samples from the buffer, without actually + * returning them (i.e. discard the next n samples). Returns the + * number of samples actually available for discarding. + */ + int skip(int n); + + /** + * Write n samples to the buffer. If insufficient space is + * available, not all samples may actually be written. Returns + * the number of samples actually written. + * + * This is a template function, taking an argument S for the source + * sample type, which is permitted to differ from T if the two + * types are compatible for assignment. + */ + template <typename S> + int write(const S *const BQ_R__ source, int n); + + /** + * Write n zero-value samples to the buffer. If insufficient + * space is available, not all zeros may actually be written. + * Returns the number of zeroes actually written. + */ + int zero(int n); + +protected: + T *const BQ_R__ m_buffer; + int m_writer; + int m_reader; + const int m_size; + + int readSpaceFor(int w, int r) const { + int space; + if (w > r) space = w - r; + else if (w < r) space = (w + m_size) - r; + else space = 0; + return space; + } + + int writeSpaceFor(int w, int r) const { + int space = (r + m_size - w - 1); + if (space >= m_size) space -= m_size; + return space; + } + +private: + RingBuffer(const RingBuffer &); // not provided + RingBuffer &operator=(const RingBuffer &); // not provided +}; + +template <typename T> +RingBuffer<T>::RingBuffer(int n) : + m_buffer(allocate<T>(n + 1)), + m_writer(0), + m_size(n + 1) +{ +#ifdef DEBUG_RINGBUFFER + std::cerr << "RingBuffer<T>[" << this << "]::RingBuffer(" << n << ")" << std::endl; +#endif + + m_reader = 0; +} + +template <typename T> +RingBuffer<T>::~RingBuffer() +{ +#ifdef DEBUG_RINGBUFFER + std::cerr << "RingBuffer<T>[" << this << "]::~RingBuffer" << std::endl; +#endif + + deallocate(m_buffer); +} + +template <typename T> +int +RingBuffer<T>::getSize() const +{ +#ifdef DEBUG_RINGBUFFER + std::cerr << "RingBuffer<T>[" << this << "]::getSize(): " << m_size-1 << std::endl; +#endif + + return m_size - 1; +} + +template <typename T> +RingBuffer<T> * +RingBuffer<T>::resized(int newSize) const +{ + RingBuffer<T> *newBuffer = new RingBuffer<T>(newSize); + + int w = m_writer; + int r = m_reader; + + while (r != w) { + T value = m_buffer[r]; + newBuffer->write(&value, 1); + if (++r == m_size) r = 0; + } + + return newBuffer; +} + +template <typename T> +void +RingBuffer<T>::reset() +{ +#ifdef DEBUG_RINGBUFFER + std::cerr << "RingBuffer<T>[" << this << "]::reset" << std::endl; +#endif + + m_reader = m_writer; +} + +template <typename T> +int +RingBuffer<T>::getReadSpace() const +{ + return readSpaceFor(m_writer, m_reader); +} + +template <typename T> +int +RingBuffer<T>::getWriteSpace() const +{ + return writeSpaceFor(m_writer, m_reader); +} + +template <typename T> +template <typename S> +int +RingBuffer<T>::read(S *const BQ_R__ destination, int n) +{ + int w = m_writer; + int r = m_reader; + + int available = readSpaceFor(w, r); + if (n > available) { + std::cerr << "WARNING: RingBuffer::read: " << n << " requested, only " + << available << " available" << std::endl; + n = available; + } + if (n == 0) return n; + + int here = m_size - r; + T *const BQ_R__ bufbase = m_buffer + r; + + if (here >= n) { + v_convert(destination, bufbase, n); + } else { + v_convert(destination, bufbase, here); + v_convert(destination + here, m_buffer, n - here); + } + + r += n; + while (r >= m_size) r -= m_size; + + BQ_MBARRIER(); + m_reader = r; + + return n; +} + +template <typename T> +template <typename S> +int +RingBuffer<T>::readAdding(S *const BQ_R__ destination, int n) +{ + int w = m_writer; + int r = m_reader; + + int available = readSpaceFor(w, r); + if (n > available) { + std::cerr << "WARNING: RingBuffer::read: " << n << " requested, only " + << available << " available" << std::endl; + n = available; + } + if (n == 0) return n; + + int here = m_size - r; + T *const BQ_R__ bufbase = m_buffer + r; + + if (here >= n) { + v_add(destination, bufbase, n); + } else { + v_add(destination, bufbase, here); + v_add(destination + here, m_buffer, n - here); + } + + r += n; + while (r >= m_size) r -= m_size; + + BQ_MBARRIER(); + m_reader = r; + + return n; +} + +template <typename T> +T +RingBuffer<T>::readOne() +{ + int w = m_writer; + int r = m_reader; + + if (w == r) { + std::cerr << "WARNING: RingBuffer::readOne: no sample available" + << std::endl; + return T(); + } + + T value = m_buffer[r]; + if (++r == m_size) r = 0; + + BQ_MBARRIER(); + m_reader = r; + + return value; +} + +template <typename T> +int +RingBuffer<T>::peek(T *const BQ_R__ destination, int n) const +{ + int w = m_writer; + int r = m_reader; + + int available = readSpaceFor(w, r); + if (n > available) { + std::cerr << "WARNING: RingBuffer::peek: " << n << " requested, only " + << available << " available" << std::endl; + memset(destination + available, 0, (n - available) * sizeof(T)); + n = available; + } + if (n == 0) return n; + + int here = m_size - r; + const T *const BQ_R__ bufbase = m_buffer + r; + + if (here >= n) { + v_copy(destination, bufbase, n); + } else { + v_copy(destination, bufbase, here); + v_copy(destination + here, m_buffer, n - here); + } + + return n; +} + +template <typename T> +T +RingBuffer<T>::peekOne() const +{ + int w = m_writer; + int r = m_reader; + + if (w == r) { + std::cerr << "WARNING: RingBuffer::peekOne: no sample available" + << std::endl; + return 0; + } + + T value = m_buffer[r]; + return value; +} + +template <typename T> +int +RingBuffer<T>::skip(int n) +{ + int w = m_writer; + int r = m_reader; + + int available = readSpaceFor(w, r); + if (n > available) { + std::cerr << "WARNING: RingBuffer::skip: " << n << " requested, only " + << available << " available" << std::endl; + n = available; + } + if (n == 0) return n; + + r += n; + while (r >= m_size) r -= m_size; + + // No memory barrier required, because we didn't read any data + m_reader = r; + + return n; +} + +template <typename T> +template <typename S> +int +RingBuffer<T>::write(const S *const BQ_R__ source, int n) +{ + int w = m_writer; + int r = m_reader; + + int available = writeSpaceFor(w, r); + if (n > available) { + std::cerr << "WARNING: RingBuffer::write: " << n + << " requested, only room for " << available << std::endl; + n = available; + } + if (n == 0) return n; + + int here = m_size - w; + T *const BQ_R__ bufbase = m_buffer + w; + + if (here >= n) { + v_convert<S, T>(bufbase, source, n); + } else { + v_convert<S, T>(bufbase, source, here); + v_convert<S, T>(m_buffer, source + here, n - here); + } + + w += n; + while (w >= m_size) w -= m_size; + + BQ_MBARRIER(); + m_writer = w; + + return n; +} + +template <typename T> +int +RingBuffer<T>::zero(int n) +{ + int w = m_writer; + int r = m_reader; + + int available = writeSpaceFor(w, r); + if (n > available) { + std::cerr << "WARNING: RingBuffer::zero: " << n + << " requested, only room for " << available << std::endl; + n = available; + } + if (n == 0) return n; + + int here = m_size - w; + T *const BQ_R__ bufbase = m_buffer + w; + + if (here >= n) { + v_zero(bufbase, n); + } else { + v_zero(bufbase, here); + v_zero(m_buffer, n - here); + } + + w += n; + while (w >= m_size) w -= m_size; + + BQ_MBARRIER(); + m_writer = w; + + return n; +} + +} + +#endif // BQVEC_RINGBUFFER_H
--- a/bqvec/bqvec/VectorOps.h Sat Nov 12 09:59:34 2016 +0000 +++ b/bqvec/bqvec/VectorOps.h Tue Nov 19 10:13:32 2019 +0000 @@ -6,7 +6,7 @@ A small library for vector arithmetic and allocation in C++ using raw C pointer arrays. - Copyright 2007-2015 Particular Programs Ltd. + Copyright 2007-2016 Particular Programs Ltd. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation @@ -40,15 +40,20 @@ #ifndef _MSC_VER #include <inttypes.h> #endif +#include <ippversion.h> #include <ipps.h> +#if (IPP_VERSION_MAJOR <= 7) +// Deprecated in v8, removed in v9 #include <ippac.h> #endif +#endif #ifdef HAVE_VDSP #include <Accelerate/Accelerate.h> #endif #include <cstring> +#include <cmath> #include "Restrict.h" @@ -59,9 +64,17 @@ * * Write basic vector-manipulation loops in such a way as to promote * the likelihood that a good current C++ compiler can auto-vectorize - * them (e.g. gcc-4.x with -ftree-vectorize). Provide calls out to - * supported vector libraries (e.g. IPP, Accelerate) where useful. - * No intrinsics or assembly. + * them (e.g. gcc-4+ with -ftree-vectorize/-O3). Provide calls out to + * supported vector libraries (e.g. IPP, Accelerate) where useful. No + * intrinsics or assembly. + * + * Size and index arguments are plain machine ints, to facilitate + * compiler optimization and vectorization. In general these functions + * should be used only with buffers whose sizes are calculated from + * known processing parameters and that are known to be much smaller + * than 32-bit int range. For security reasons you should not use + * these functions with buffers whose sizes may be under control of + * the user or external input. * * Argument ordering: * @@ -71,11 +84,13 @@ * if present (i.e. argument ordering follows memcpy). * * The final argument is always a count of the number of elements in - * each vector. + * each vector. * * Some functions operate on a set of vectors at once: their names all * contain the text _channels, and the number of channels (i.e. number - * of vectors) is the argument before last. + * of vectors) is the argument before last. If the number of input and + * output channels may differ, the output channel count immediately + * follows the output channel pointer. * * Any additional arguments, e.g. scale factors, appear between the * vector pointers at the start and the counts at the end. @@ -127,7 +142,7 @@ { vDSP_vclrD(vec, 1, count); } -#endif +#endif // HAVE_IPP /** * v_zero_channels @@ -194,7 +209,7 @@ { ippsCopy_64f(src, dst, count); } -#endif +#endif // HAVE_IPP /** * v_copy_channels @@ -247,7 +262,30 @@ { ippsMove_64f(src, dst, count); } -#endif +#endif // HAVE_IPP + +/** + * v_move_channels + * + * Copy the contents of the individual vectors in the set \arg src to + * the corresponding vectors in the set \arg dst. All vectors have + * length \arg count and there are \arg channels vectors in each set. + * + * The source vectors may overlap with the target vectors of + * corresponding channels. That is, src[i] may overlap with dst[i] but + * not with any other source or target vector. (If you know that no + * vectors can overlap, use v_copy instead.) + */ +template<typename T> +inline void v_move_channels(T *const *const dst, // not BQ_R__ (aliased) + const T *const *const src, // not BQ_R__ (aliased) + const int channels, + const int count) +{ + for (int c = 0; c < channels; ++c) { + v_move(dst[c], src[c], count); + } +} /** * v_convert @@ -314,7 +352,7 @@ { vDSP_vdpsp((double *)src, 1, dst, 1, count); } -#endif +#endif // HAVE_VDSP /** * v_convert_channels @@ -344,71 +382,55 @@ * v_add * * Add the elements in the vector \arg src to the corresponding - * elements in the vector \arg dst, both of length arg \count, leaving - * the result in \arg dst. + * elements in the vector \arg srcdst, both of length arg \count, leaving + * the result in \arg srcdst. * - * Caller guarantees that \arg src and \arg dst are non-overlapping. + * Caller guarantees that \arg src and \arg srcdst are non-overlapping. */ template<typename T> -inline void v_add(T *const BQ_R__ dst, +inline void v_add(T *const BQ_R__ srcdst, const T *const BQ_R__ src, const int count) { for (int i = 0; i < count; ++i) { - dst[i] += src[i]; - } -} - -/** - * v_add - * - * Add the constant \arg value to every element of the vector \arg - * dst, of length arg \count, leaving the result in \arg dst. - */ -template<typename T> -inline void v_add(T *const BQ_R__ dst, - const T value, - const int count) -{ - for (int i = 0; i < count; ++i) { - dst[i] += value; + srcdst[i] += src[i]; } } #if defined HAVE_IPP template<> -inline void v_add(float *const BQ_R__ dst, +inline void v_add(float *const BQ_R__ srcdst, const float *const BQ_R__ src, const int count) { - ippsAdd_32f_I(src, dst, count); + ippsAdd_32f_I(src, srcdst, count); } -inline void v_add(double *const BQ_R__ dst, +inline void v_add(double *const BQ_R__ srcdst, const double *const BQ_R__ src, const int count) { - ippsAdd_64f_I(src, dst, count); + ippsAdd_64f_I(src, srcdst, count); } -#endif +#endif // HAVE_IPP /** * v_add_channels * * Add the elements in the individual vectors in the set \arg src to * the corresponding elements of the corresponding vectors in \arg - * dst, leaving the results in \arg dst. All vectors have length \arg + * srcdst, leaving the results in \arg srcdst. All vectors have length \arg * count, and there are \arg channels vectors in each set. * - * Caller guarantees that all of the \arg src and \arg dst vectors are + * Caller guarantees that all of the \arg src and \arg srcdst vectors are * non-overlapping with each other. */ template<typename T> -inline void v_add_channels(T *const BQ_R__ *const BQ_R__ dst, +inline void v_add_channels(T *const BQ_R__ *const BQ_R__ srcdst, const T *const BQ_R__ *const BQ_R__ src, const int channels, const int count) { for (int c = 0; c < channels; ++c) { - v_add(dst[c], src[c], count); + v_add(srcdst[c], src[c], count); } } @@ -417,19 +439,19 @@ * * Add the elements in the vector \arg src, each multiplied by the * constant factor \arg gain, to the corresponding elements in the - * vector \arg dst, both of length arg \count, leaving the result in - * \arg dst. + * vector \arg srcdst, both of length arg \count, leaving the result in + * \arg srcdst. * - * Caller guarantees that \arg src and \arg dst are non-overlapping. + * Caller guarantees that \arg src and \arg srcdst are non-overlapping. */ template<typename T, typename G> -inline void v_add_with_gain(T *const BQ_R__ dst, +inline void v_add_with_gain(T *const BQ_R__ srcdst, const T *const BQ_R__ src, const G gain, const int count) { for (int i = 0; i < count; ++i) { - dst[i] += src[i] * gain; + srcdst[i] += src[i] * gain; } } @@ -438,22 +460,22 @@ * * Add the elements in the individual vectors in the set \arg src, * each multiplied by the constant factor \arg gain, to the - * corresponding elements of the corresponding vectors in \arg dst, - * leaving the results in \arg dst. All vectors have length \arg + * corresponding elements of the corresponding vectors in \arg srcdst, + * leaving the results in \arg srcdst. All vectors have length \arg * count, and there are \arg channels vectors in each set. * - * Caller guarantees that all of the \arg src and \arg dst vectors are + * Caller guarantees that all of the \arg src and \arg srcdst vectors are * non-overlapping with each other. */ template<typename T, typename G> -inline void v_add_channels_with_gain(T *const BQ_R__ *const BQ_R__ dst, +inline void v_add_channels_with_gain(T *const BQ_R__ *const BQ_R__ srcdst, const T *const BQ_R__ *const BQ_R__ src, const G gain, const int channels, const int count) { for (int c = 0; c < channels; ++c) { - v_add_with_gain(dst[c], src[c], gain, count); + v_add_with_gain(srcdst[c], src[c], gain, count); } } @@ -461,105 +483,121 @@ * v_subtract * * Subtract the elements in the vector \arg src from the corresponding - * elements in the vector \arg dst, both of length arg \count, leaving - * the result in \arg dst. + * elements in the vector \arg srcdst, both of length arg \count, leaving + * the result in \arg srcdst. * - * Caller guarantees that \arg src and \arg dst are non-overlapping. + * Caller guarantees that \arg src and \arg srcdst are non-overlapping. */ template<typename T> -inline void v_subtract(T *const BQ_R__ dst, +inline void v_subtract(T *const BQ_R__ srcdst, const T *const BQ_R__ src, const int count) { for (int i = 0; i < count; ++i) { - dst[i] -= src[i]; + srcdst[i] -= src[i]; } } #if defined HAVE_IPP template<> -inline void v_subtract(float *const BQ_R__ dst, +inline void v_subtract(float *const BQ_R__ srcdst, const float *const BQ_R__ src, const int count) { - ippsSub_32f_I(src, dst, count); + ippsSub_32f_I(src, srcdst, count); } -inline void v_subtract(double *const BQ_R__ dst, +inline void v_subtract(double *const BQ_R__ srcdst, const double *const BQ_R__ src, const int count) { - ippsSub_64f_I(src, dst, count); + ippsSub_64f_I(src, srcdst, count); } -#endif +#endif // HAVE_IPP /** * v_scale * - * Scale the elements in the vector \arg dst, of length \arg count, by + * Scale the elements in the vector \arg srcdst, of length \arg count, by * the factor \arg gain. */ template<typename T, typename G> -inline void v_scale(T *const BQ_R__ dst, +inline void v_scale(T *const BQ_R__ srcdst, const G gain, const int count) { for (int i = 0; i < count; ++i) { - dst[i] *= gain; + srcdst[i] *= gain; } } #if defined HAVE_IPP template<> -inline void v_scale(float *const BQ_R__ dst, +inline void v_scale(float *const BQ_R__ srcdst, const float gain, const int count) { - ippsMulC_32f_I(gain, dst, count); + ippsMulC_32f_I(gain, srcdst, count); } template<> -inline void v_scale(double *const BQ_R__ dst, +inline void v_scale(double *const BQ_R__ srcdst, const double gain, const int count) { - ippsMulC_64f_I(gain, dst, count); + ippsMulC_64f_I(gain, srcdst, count); } -#endif +#endif // HAVE_IPP + +/** + * v_increment + * + * Add a constant quantity \incr to all of the elements in the vector + * \arg srcdst, of length \arg count. + */ +template<typename T, typename G> +inline void v_increment(T *const BQ_R__ srcdst, + const G incr, + const int count) +{ + for (int i = 0; i < count; ++i) { + srcdst[i] += T(incr); + } +} /** * v_multiply * - * Multiply the elements in the vector \arg dst by the corresponding + * Multiply the elements in the vector \arg srcdst by the corresponding * elements in the vector \arg src, both of length arg \count, leaving - * the result in \arg dst. + * the result in \arg srcdst. * - * Caller guarantees that \arg src and \arg dst are non-overlapping. + * Caller guarantees that \arg src and \arg srcdst are non-overlapping. */ template<typename T> -inline void v_multiply(T *const BQ_R__ dst, +inline void v_multiply(T *const BQ_R__ srcdst, const T *const BQ_R__ src, const int count) { for (int i = 0; i < count; ++i) { - dst[i] *= src[i]; + srcdst[i] *= src[i]; } } #if defined HAVE_IPP template<> -inline void v_multiply(float *const BQ_R__ dst, +inline void v_multiply(float *const BQ_R__ srcdst, const float *const BQ_R__ src, const int count) { - ippsMul_32f_I(src, dst, count); + ippsMul_32f_I(src, srcdst, count); } template<> -inline void v_multiply(double *const BQ_R__ dst, +inline void v_multiply(double *const BQ_R__ srcdst, const double *const BQ_R__ src, const int count) { - ippsMul_64f_I(src, dst, count); + ippsMul_64f_I(src, srcdst, count); } -#endif +#endif // HAVE_IPP /** * v_multiply @@ -572,10 +610,10 @@ * non-overlapping. */ template<typename T> -inline void v_multiply(T *const BQ_R__ dst, - const T *const BQ_R__ src1, - const T *const BQ_R__ src2, - const int count) +inline void v_multiply_to(T *const BQ_R__ dst, + const T *const BQ_R__ src1, + const T *const BQ_R__ src2, + const int count) { for (int i = 0; i < count; ++i) { dst[i] = src1[i] * src2[i]; @@ -584,98 +622,98 @@ #if defined HAVE_IPP template<> -inline void v_multiply(float *const BQ_R__ dst, - const float *const BQ_R__ src1, - const float *const BQ_R__ src2, - const int count) +inline void v_multiply_to(float *const BQ_R__ dst, + const float *const BQ_R__ src1, + const float *const BQ_R__ src2, + const int count) { ippsMul_32f(src1, src2, dst, count); } template<> -inline void v_multiply(double *const BQ_R__ dst, - const double *const BQ_R__ src1, - const double *const BQ_R__ src2, - const int count) +inline void v_multiply_to(double *const BQ_R__ dst, + const double *const BQ_R__ src1, + const double *const BQ_R__ src2, + const int count) { ippsMul_64f(src1, src2, dst, count); } -#endif +#endif // HAVE_IPP /** * v_divide * - * Divide the elements in the vector \arg dst by the corresponding + * Divide the elements in the vector \arg srcdst by the corresponding * elements in the vector \arg src, both of length arg \count, leaving - * the result in \arg dst. + * the result in \arg srcdst. * - * Caller guarantees that \arg src and \arg dst are non-overlapping. + * Caller guarantees that \arg src and \arg srcdst are non-overlapping. */ template<typename T> -inline void v_divide(T *const BQ_R__ dst, +inline void v_divide(T *const BQ_R__ srcdst, const T *const BQ_R__ src, const int count) { for (int i = 0; i < count; ++i) { - dst[i] /= src[i]; + srcdst[i] /= src[i]; } } #if defined HAVE_IPP template<> -inline void v_divide(float *const BQ_R__ dst, +inline void v_divide(float *const BQ_R__ srcdst, const float *const BQ_R__ src, const int count) { - ippsDiv_32f_I(src, dst, count); + ippsDiv_32f_I(src, srcdst, count); } template<> -inline void v_divide(double *const BQ_R__ dst, +inline void v_divide(double *const BQ_R__ srcdst, const double *const BQ_R__ src, const int count) { - ippsDiv_64f_I(src, dst, count); + ippsDiv_64f_I(src, srcdst, count); } -#endif +#endif // HAVE_IPP /** * v_multiply_and_add * * Multiply the corresponding elements of the vectors \arg src1 and * \arg src2, both of length arg \count, and add the results to the - * corresponding elements of vector \arg dst. + * corresponding elements of vector \arg srcdst. * - * Caller guarantees that \arg src1, \arg src2 and \arg dst are + * Caller guarantees that \arg src1, \arg src2 and \arg srcdst are * non-overlapping. */ template<typename T> -inline void v_multiply_and_add(T *const BQ_R__ dst, +inline void v_multiply_and_add(T *const BQ_R__ srcdst, const T *const BQ_R__ src1, const T *const BQ_R__ src2, const int count) { for (int i = 0; i < count; ++i) { - dst[i] += src1[i] * src2[i]; + srcdst[i] += src1[i] * src2[i]; } } #if defined HAVE_IPP template<> -inline void v_multiply_and_add(float *const BQ_R__ dst, +inline void v_multiply_and_add(float *const BQ_R__ srcdst, const float *const BQ_R__ src1, const float *const BQ_R__ src2, const int count) { - ippsAddProduct_32f(src1, src2, dst, count); + ippsAddProduct_32f(src1, src2, srcdst, count); } template<> -inline void v_multiply_and_add(double *const BQ_R__ dst, +inline void v_multiply_and_add(double *const BQ_R__ srcdst, const double *const BQ_R__ src1, const double *const BQ_R__ src2, const int count) { - ippsAddProduct_64f(src1, src2, dst, count); + ippsAddProduct_64f(src1, src2, srcdst, count); } -#endif +#endif // HAVE_IPP /** * v_sum @@ -695,32 +733,53 @@ } /** + * v_multiply_and_sum + * + * Multiply the corresponding elements of the vectors \arg src1 and + * \arg src2, both of length arg \count, sum the results, and return + * the sum as a scalar value. + * + * Caller guarantees that \arg src1 and \arg src2 are non-overlapping. + */ +template<typename T> +inline T v_multiply_and_sum(const T *const BQ_R__ src1, + const T *const BQ_R__ src2, + const int count) +{ + T result = T(); + for (int i = 0; i < count; ++i) { + result += src1[i] * src2[i]; + } + return result; +} + +/** * v_log * - * Replace each element in vector \arg dst, of length \arg count, with + * Replace each element in vector \arg srcdst, of length \arg count, with * its natural logarithm. */ template<typename T> -inline void v_log(T *const BQ_R__ dst, +inline void v_log(T *const BQ_R__ srcdst, const int count) { for (int i = 0; i < count; ++i) { - dst[i] = log(dst[i]); + srcdst[i] = log(srcdst[i]); } } #if defined HAVE_IPP template<> -inline void v_log(float *const BQ_R__ dst, +inline void v_log(float *const BQ_R__ srcdst, const int count) { - ippsLn_32f_I(dst, count); + ippsLn_32f_I(srcdst, count); } template<> -inline void v_log(double *const BQ_R__ dst, +inline void v_log(double *const BQ_R__ srcdst, const int count) { - ippsLn_64f_I(dst, count); + ippsLn_64f_I(srcdst, count); } #elif defined HAVE_VDSP // no in-place vForce functions for these -- can we use the @@ -728,50 +787,50 @@ // use an out-of-place one with temporary buffer and still be faster // than doing it any other way? template<> -inline void v_log(float *const BQ_R__ dst, +inline void v_log(float *const BQ_R__ srcdst, const int count) { float tmp[count]; - vvlogf(tmp, dst, &count); - v_copy(dst, tmp, count); + vvlogf(tmp, srcdst, &count); + v_copy(srcdst, tmp, count); } template<> -inline void v_log(double *const BQ_R__ dst, +inline void v_log(double *const BQ_R__ srcdst, const int count) { double tmp[count]; - vvlog(tmp, dst, &count); - v_copy(dst, tmp, count); + vvlog(tmp, srcdst, &count); + v_copy(srcdst, tmp, count); } -#endif +#endif // HAVE_VDSP /** * v_exp * - * Replace each element in vector \arg dst, of length \arg count, with + * Replace each element in vector \arg srcdst, of length \arg count, with * its base-e exponential. */ template<typename T> -inline void v_exp(T *const BQ_R__ dst, +inline void v_exp(T *const BQ_R__ srcdst, const int count) { for (int i = 0; i < count; ++i) { - dst[i] = exp(dst[i]); + srcdst[i] = exp(srcdst[i]); } } #if defined HAVE_IPP template<> -inline void v_exp(float *const BQ_R__ dst, +inline void v_exp(float *const BQ_R__ srcdst, const int count) { - ippsExp_32f_I(dst, count); + ippsExp_32f_I(srcdst, count); } template<> -inline void v_exp(double *const BQ_R__ dst, +inline void v_exp(double *const BQ_R__ srcdst, const int count) { - ippsExp_64f_I(dst, count); + ippsExp_64f_I(srcdst, count); } #elif defined HAVE_VDSP // no in-place vForce functions for these -- can we use the @@ -779,50 +838,50 @@ // use an out-of-place one with temporary buffer and still be faster // than doing it any other way? template<> -inline void v_exp(float *const BQ_R__ dst, +inline void v_exp(float *const BQ_R__ srcdst, const int count) { float tmp[count]; - vvexpf(tmp, dst, &count); - v_copy(dst, tmp, count); + vvexpf(tmp, srcdst, &count); + v_copy(srcdst, tmp, count); } template<> -inline void v_exp(double *const BQ_R__ dst, +inline void v_exp(double *const BQ_R__ srcdst, const int count) { double tmp[count]; - vvexp(tmp, dst, &count); - v_copy(dst, tmp, count); + vvexp(tmp, srcdst, &count); + v_copy(srcdst, tmp, count); } -#endif +#endif // HAVE_VDSP /** * v_sqrt * - * Replace each element in vector \arg dst, of length \arg count, with + * Replace each element in vector \arg srcdst, of length \arg count, with * its square root. */ template<typename T> -inline void v_sqrt(T *const BQ_R__ dst, +inline void v_sqrt(T *const BQ_R__ srcdst, const int count) { for (int i = 0; i < count; ++i) { - dst[i] = sqrt(dst[i]); + srcdst[i] = sqrt(srcdst[i]); } } #if defined HAVE_IPP template<> -inline void v_sqrt(float *const BQ_R__ dst, +inline void v_sqrt(float *const BQ_R__ srcdst, const int count) { - ippsSqrt_32f_I(dst, count); + ippsSqrt_32f_I(srcdst, count); } template<> -inline void v_sqrt(double *const BQ_R__ dst, +inline void v_sqrt(double *const BQ_R__ srcdst, const int count) { - ippsSqrt_64f_I(dst, count); + ippsSqrt_64f_I(srcdst, count); } #elif defined HAVE_VDSP // no in-place vForce functions for these -- can we use the @@ -830,95 +889,95 @@ // use an out-of-place one with temporary buffer and still be faster // than doing it any other way? template<> -inline void v_sqrt(float *const BQ_R__ dst, +inline void v_sqrt(float *const BQ_R__ srcdst, const int count) { float tmp[count]; - vvsqrtf(tmp, dst, &count); - v_copy(dst, tmp, count); + vvsqrtf(tmp, srcdst, &count); + v_copy(srcdst, tmp, count); } template<> -inline void v_sqrt(double *const BQ_R__ dst, +inline void v_sqrt(double *const BQ_R__ srcdst, const int count) { double tmp[count]; - vvsqrt(tmp, dst, &count); - v_copy(dst, tmp, count); + vvsqrt(tmp, srcdst, &count); + v_copy(srcdst, tmp, count); } -#endif +#endif // HAVE_VDSP /** * v_square * - * Replace each element in vector \arg dst, of length \arg count, with + * Replace each element in vector \arg srcdst, of length \arg count, with * its square. */ template<typename T> -inline void v_square(T *const BQ_R__ dst, +inline void v_square(T *const BQ_R__ srcdst, const int count) { for (int i = 0; i < count; ++i) { - dst[i] = dst[i] * dst[i]; + srcdst[i] = srcdst[i] * srcdst[i]; } } #if defined HAVE_IPP template<> -inline void v_square(float *const BQ_R__ dst, +inline void v_square(float *const BQ_R__ srcdst, const int count) { - ippsSqr_32f_I(dst, count); + ippsSqr_32f_I(srcdst, count); } template<> -inline void v_square(double *const BQ_R__ dst, +inline void v_square(double *const BQ_R__ srcdst, const int count) { - ippsSqr_64f_I(dst, count); + ippsSqr_64f_I(srcdst, count); } -#endif +#endif // HAVE_IPP /** * v_abs * - * Replace each element in vector \arg dst, of length \arg count, with + * Replace each element in vector \arg srcdst, of length \arg count, with * its absolute value. */ template<typename T> -inline void v_abs(T *const BQ_R__ dst, +inline void v_abs(T *const BQ_R__ srcdst, const int count) { for (int i = 0; i < count; ++i) { - dst[i] = fabs(dst[i]); + srcdst[i] = fabs(srcdst[i]); } } #if defined HAVE_IPP template<> -inline void v_abs(float *const BQ_R__ dst, +inline void v_abs(float *const BQ_R__ srcdst, const int count) { - ippsAbs_32f_I(dst, count); + ippsAbs_32f_I(srcdst, count); } template<> -inline void v_abs(double *const BQ_R__ dst, +inline void v_abs(double *const BQ_R__ srcdst, const int count) { - ippsAbs_64f_I(dst, count); + ippsAbs_64f_I(srcdst, count); } #elif defined HAVE_VDSP template<> -inline void v_abs(float *const BQ_R__ dst, +inline void v_abs(float *const BQ_R__ srcdst, const int count) { float tmp[count]; #if (defined(MACOSX_DEPLOYMENT_TARGET) && MACOSX_DEPLOYMENT_TARGET <= 1070 && MAC_OS_X_VERSION_MIN_REQUIRED <= 1070) - vvfabf(tmp, dst, &count); + vvfabf(tmp, srcdst, &count); #else - vvfabsf(tmp, dst, &count); + vvfabsf(tmp, srcdst, &count); #endif - v_copy(dst, tmp, count); + v_copy(srcdst, tmp, count); } -#endif +#endif // HAVE_VDSP /** * v_interleave @@ -959,6 +1018,8 @@ } #if defined HAVE_IPP +#if (IPP_VERSION_MAJOR <= 7) +// Deprecated in v8, removed in v9 template<> inline void v_interleave(float *const BQ_R__ dst, const float *const BQ_R__ *const BQ_R__ src, @@ -969,6 +1030,7 @@ } // IPP does not (currently?) provide double-precision interleave #endif +#endif // HAVE_IPP /** * v_deinterleave @@ -1009,6 +1071,8 @@ } #if defined HAVE_IPP +#if (IPP_VERSION_MAJOR <= 7) +// Deprecated in v8, removed in v9 template<> inline void v_deinterleave(float *const BQ_R__ *const BQ_R__ dst, const float *const BQ_R__ src, @@ -1019,6 +1083,7 @@ } // IPP does not (currently?) provide double-precision deinterleave #endif +#endif // HAVE_IPP /** * v_fftshift @@ -1043,12 +1108,13 @@ * v_mean * * Return the mean of the values in the vector \arg vec, of length - * \arg count. + * \arg count. For an empty vector, return 0.0. */ template<typename T> inline T v_mean(const T *const BQ_R__ vec, const int count) { T t = T(0); + if (count == 0) return t; for (int i = 0; i < count; ++i) { t += vec[i]; } @@ -1076,6 +1142,144 @@ return t; } +/** + * v_mix + * + * Mixdown N channels to 1 channel by simple averaging. + * + * Add the elements in the individual vectors in the set \arg in, + * each multiplied by the a constant factor 1 / \arg channels, and + * leave the results in \arg out. All vectors have length \arg count, + * and there are \arg channels vectors in the input set. + * + * Caller guarantees that all of the \arg in and \arg out vectors are + * non-overlapping with each other. + */ +template <typename T> +inline void v_mix(T *const BQ_R__ out, + const T *const BQ_R__ *const BQ_R__ in, + const int channels, + const int count) +{ + v_zero(out, count); + for (int c = 0; c < channels; ++c) { + v_add(out, in[c], count); + } + v_scale(out, T(1.0) / T(channels), count); +} + +/** + * v_reconfigure_channels + * + * Adapt a fixed number of frames from n to m channels. + * + * We vaguely assume these channel ordering conventions (from + * Vorbis/Opus docs): + * + * 1 channel = mono + * 2 = stereo + * 3 = l/c/r + * 4 = fl/fr/rl/rr + * 5 = fl/c/fr/rl/rr + * 6 (= 5.1) = fl/c/fr/rl/rr/lfe + * 7 (= 6.1) = fl/c/fr/sl/sr/rc/lfe + * 8 (= 7.1) = fl/c/fr/sl/sr/rl/rr/lfe + * >8 = application-specific + * + * (where l = left, c = centre, r = right, fl/fr = front l/r; sl/sr = + * side l/r; rl/rr = rear l/r; lfe = low-frequency effects channel) + * + * The reconfiguration rules are: + * -- if n == m, copy the input through unchanged + * -- else if m == 1, mixdown to mono by averaging all n input channels + * -- else if n == 1, duplicate the mono input across all m output channels + * -- else if m == 2 and n == 3 or n >= 5, take 1st and 3rd channels of input + * -- else if n == 2 and m == 3 or m >= 5, copy to 1st and 3rd, the rest silent + * -- else if n > m, take the first m channels of the input + * -- else take all n channels of the input and add m-n silent channels + * + * The aim here is to be simple and not unreasonable, to reconfigure, + * not to properly convert - that would involve more + * application-specific decisions. + */ +template<typename T> +inline void v_reconfigure_channels(T *const BQ_R__ *const BQ_R__ out, + const int m, /* out channel count */ + const T *const BQ_R__ *const BQ_R__ in, + const int n, /* in channel count */ + const int count) +{ + if (n == m) { + v_copy_channels(out, in, n, count); + } else if (m == 1) { + v_mix(out[0], in, n, count); + } else if (n == 1) { + for (int c = 0; c < m; ++c) { + v_copy(out[c], in[0], count); + } + } else if (m == 2 && (n == 3 || n >= 5)) { + v_copy(out[0], in[0], count); + v_copy(out[1], in[2], count); + } else if (n == 2 && (m == 3 || m >= 5)) { + v_copy(out[0], in[0], count); + v_zero(out[1], count); + v_copy(out[2], in[1], count); + for (int c = 3; c < m; ++c) { + v_zero(out[c], count); + } + } else { + int c = 0; + while (c < n && c < m) { + v_copy(out[c], in[c], count); + ++c; + } + while (c < m) { + v_zero(out[c], count); + ++c; + } + } +} + +/** + * v_reconfigure_channels_inplace + * + * Convert a fixed number of frames from n to m channels, operating + * in-place. That is, the input and output buffer arrays are the same, + * having space for max(n, m) channel arrays, and we read n channels + * from them and write back m. + * + * The rules are: + * -- if n >= m and m > 1, leave unchanged + * -- else if m == 1, mixdown to mono by averaging all n channels + * -- else if n == 1, duplicate the mono input across all m channels + * -- else leave first n channels and add m-n silent channels + */ +template<typename T> +inline void v_reconfigure_channels_inplace(T *const BQ_R__ *const BQ_R__ inout, + const int m, /* out channel count */ + const int n, /* in channel count */ + const int count) +{ + if (n >= m && m > 1) { + return; + } else if (m == 1) { + for (int c = 1; c < n; ++c) { + v_add(inout[0], inout[c], count); + } + v_scale(inout[0], T(1.0) / T(n), count); + } else if (n == 1) { + for (int c = 1; c < m; ++c) { + v_copy(inout[c], inout[0], count); + } + } else { + int c = n; + while (c < m) { + v_zero(inout[c], count); + ++c; + } + } +} + } #endif
--- a/bqvec/bqvec/VectorOpsComplex.h Sat Nov 12 09:59:34 2016 +0000 +++ b/bqvec/bqvec/VectorOpsComplex.h Tue Nov 19 10:13:32 2019 +0000 @@ -6,7 +6,7 @@ A small library for vector arithmetic and allocation in C++ using raw C pointer arrays. - Copyright 2007-2015 Particular Programs Ltd. + Copyright 2007-2016 Particular Programs Ltd. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation @@ -61,7 +61,7 @@ } else { vDSP_vclrD((double *)ptr, 1, count * 2); } -#else +#else // ! HAVE_VDSP const bq_complex_element_t value = 0.0; for (int i = 0; i < count; ++i) { ptr[i].re = value; @@ -93,7 +93,7 @@ ippsMove_64fc((const Ipp64fc *)src, (Ipp64fc *)dst, count); } } -#endif +#endif // HAVE_IPP template<> inline void v_convert(bq_complex_t *const BQ_R__ dst, @@ -120,19 +120,19 @@ } } -inline void c_add(bq_complex_t &dst, +inline void c_add(bq_complex_t &srcdst, const bq_complex_t &src) { - dst.re += src.re; - dst.im += src.im; + srcdst.re += src.re; + srcdst.im += src.im; } -inline void c_add_with_gain(bq_complex_t &dst, +inline void c_add_with_gain(bq_complex_t &srcdst, const bq_complex_t &src, const bq_complex_element_t gain) { - dst.re += src.re * gain; - dst.im += src.im * gain; + srcdst.re += src.re * gain; + srcdst.im += src.im * gain; } inline void c_multiply(bq_complex_t &dst, @@ -159,75 +159,75 @@ dst.im = imag; } -inline void c_multiply(bq_complex_t &dst, +inline void c_multiply(bq_complex_t &srcdst, const bq_complex_t &src) { - c_multiply(dst, dst, src); + c_multiply(srcdst, srcdst, src); } -inline void c_multiply_and_add(bq_complex_t &dst, +inline void c_multiply_and_add(bq_complex_t &srcdst, const bq_complex_t &src1, const bq_complex_t &src2) { bq_complex_t tmp; c_multiply(tmp, src1, src2); - c_add(dst, tmp); + c_add(srcdst, tmp); } template<> -inline void v_add(bq_complex_t *const BQ_R__ dst, +inline void v_add(bq_complex_t *const BQ_R__ srcdst, const bq_complex_t *const BQ_R__ src, const int count) { #if defined HAVE_IPP if (sizeof(bq_complex_element_t) == sizeof(float)) { - ippsAdd_32fc_I((Ipp32fc *)src, (Ipp32fc *)dst, count); + ippsAdd_32fc_I((Ipp32fc *)src, (Ipp32fc *)srcdst, count); } else { - ippsAdd_64fc_I((Ipp64fc *)src, (Ipp64fc *)dst, count); + ippsAdd_64fc_I((Ipp64fc *)src, (Ipp64fc *)srcdst, count); } #else for (int i = 0; i < count; ++i) { - dst[i].re += src[i].re; - dst[i].im += src[i].im; + srcdst[i].re += src[i].re; + srcdst[i].im += src[i].im; } -#endif +#endif // HAVE_IPP } template<> -inline void v_add_with_gain(bq_complex_t *const BQ_R__ dst, +inline void v_add_with_gain(bq_complex_t *const BQ_R__ srcdst, const bq_complex_t *const BQ_R__ src, const bq_complex_element_t gain, const int count) { for (int i = 0; i < count; ++i) { - dst[i].re += src[i].re * gain; - dst[i].im += src[i].im * gain; + srcdst[i].re += src[i].re * gain; + srcdst[i].im += src[i].im * gain; } } template<> -inline void v_multiply(bq_complex_t *const BQ_R__ dst, +inline void v_multiply(bq_complex_t *const BQ_R__ srcdst, const bq_complex_t *const BQ_R__ src, const int count) { #ifdef HAVE_IPP if (sizeof(bq_complex_element_t) == sizeof(float)) { - ippsMul_32fc_I((const Ipp32fc *)src, (Ipp32fc *)dst, count); + ippsMul_32fc_I((const Ipp32fc *)src, (Ipp32fc *)srcdst, count); } else { - ippsMul_64fc_I((const Ipp64fc *)src, (Ipp64fc *)dst, count); + ippsMul_64fc_I((const Ipp64fc *)src, (Ipp64fc *)srcdst, count); } #else for (int i = 0; i < count; ++i) { - c_multiply(dst[i], src[i]); + c_multiply(srcdst[i], src[i]); } -#endif +#endif // HAVE_IPP } template<> -inline void v_multiply(bq_complex_t *const BQ_R__ dst, - const bq_complex_t *const BQ_R__ src1, - const bq_complex_t *const BQ_R__ src2, - const int count) +inline void v_multiply_to(bq_complex_t *const BQ_R__ dst, + const bq_complex_t *const BQ_R__ src1, + const bq_complex_t *const BQ_R__ src2, + const int count) { #ifdef HAVE_IPP if (sizeof(bq_complex_element_t) == sizeof(float)) { @@ -241,11 +241,11 @@ for (int i = 0; i < count; ++i) { c_multiply(dst[i], src1[i], src2[i]); } -#endif +#endif // HAVE_IPP } template<> -inline void v_multiply_and_add(bq_complex_t *const BQ_R__ dst, +inline void v_multiply_and_add(bq_complex_t *const BQ_R__ srcdst, const bq_complex_t *const BQ_R__ src1, const bq_complex_t *const BQ_R__ src2, const int count) @@ -253,16 +253,16 @@ #ifdef HAVE_IPP if (sizeof(bq_complex_element_t) == sizeof(float)) { ippsAddProduct_32fc((const Ipp32fc *)src1, (const Ipp32fc *)src2, - (Ipp32fc *)dst, count); + (Ipp32fc *)srcdst, count); } else { ippsAddProduct_64fc((const Ipp64fc *)src1, (const Ipp64fc *)src2, - (Ipp64fc *)dst, count); + (Ipp64fc *)srcdst, count); } #else for (int i = 0; i < count; ++i) { - c_multiply_and_add(dst[i], src1[i], src2[i]); + c_multiply_and_add(srcdst[i], src1[i], src2[i]); } -#endif +#endif // HAVE_IPP } #if defined( __GNUC__ ) && defined( _WIN32 ) @@ -279,7 +279,7 @@ } #endif -#endif /* !NO_COMPLEX_TYPES */ +#endif // !NO_COMPLEX_TYPES template<typename T> inline void c_phasor(T *real, T *imag, T phase) @@ -302,8 +302,12 @@ *imag = sin(phase); } #elif defined __GNUC__ +#if defined __APPLE__ +#define sincos __sincos +#define sincosf __sincosf +#endif if (sizeof(T) == sizeof(float)) { - sincosf(phase, (float *)imag, (float *)real); + sincosf(float(phase), (float *)imag, (float *)real); } else { sincos(phase, (double *)imag, (double *)real); } @@ -335,7 +339,7 @@ *phase = atan; *mag = sqrtf(real * real + imag * imag); } -#else +#else // !USE_APPROXIMATE_ATAN2 template<> inline void c_magphase(float *mag, float *phase, float real, float imag) { @@ -368,15 +372,10 @@ const bq_complex_element_t *const BQ_R__ src, const int count); -inline void v_cartesian_to_polar(bq_complex_element_t *const BQ_R__ mag, - bq_complex_element_t *const BQ_R__ phase, - const bq_complex_t *const BQ_R__ src, - const int count) -{ - for (int i = 0; i < count; ++i) { - c_magphase<bq_complex_element_t>(mag + i, phase + i, src[i].re, src[i].im); - } -} +void v_cartesian_to_polar(bq_complex_element_t *const BQ_R__ mag, + bq_complex_element_t *const BQ_R__ phase, + const bq_complex_t *const BQ_R__ src, + const int count); inline void v_cartesian_to_polar_interleaved(bq_complex_element_t *const BQ_R__ dst, const bq_complex_t *const BQ_R__ src, @@ -384,11 +383,15 @@ { for (int i = 0; i < count; ++i) { c_magphase<bq_complex_element_t>(&dst[i*2], &dst[i*2+1], - src[i].re, src[i].im); + src[i].re, src[i].im); } } -#endif /* !NO_COMPLEX_TYPES */ +void v_cartesian_to_magnitudes(bq_complex_element_t *const BQ_R__ mag, + const bq_complex_t *const BQ_R__ src, + const int count); + +#endif // !NO_COMPLEX_TYPES template<typename S, typename T> // S source, T target void v_polar_to_cartesian(T *const BQ_R__ real, @@ -434,7 +437,47 @@ } } -#if defined USE_POMMIER_MATHFUN +#ifdef HAVE_IPP +template<> +inline void v_polar_to_cartesian(float *const BQ_R__ real, + float *const BQ_R__ imag, + const float *const BQ_R__ mag, + const float *const BQ_R__ phase, + const int count) +{ + ippsPolarToCart_32f(mag, phase, real, imag, count); +} + +template<> +inline void v_polar_to_cartesian(double *const BQ_R__ real, + double *const BQ_R__ imag, + const double *const BQ_R__ mag, + const double *const BQ_R__ phase, + const int count) +{ + ippsPolarToCart_64f(mag, phase, real, imag, count); +} + +template<> +inline void v_polar_to_cartesian_interleaved(float *const BQ_R__ dst, + const float *const BQ_R__ mag, + const float *const BQ_R__ phase, + const int count) +{ + ippsPolarToCart_32fc(mag, phase, (Ipp32fc *)dst, count); +} + +template<> +inline void v_polar_to_cartesian_interleaved(double *const BQ_R__ dst, + const double *const BQ_R__ mag, + const double *const BQ_R__ phase, + const int count) +{ + ippsPolarToCart_64fc(mag, phase, (Ipp64fc *)dst, count); +} + +#elif defined USE_POMMIER_MATHFUN + void v_polar_to_cartesian_pommier(float *const BQ_R__ real, float *const BQ_R__ imag, const float *const BQ_R__ mag, @@ -472,8 +515,7 @@ { v_polar_to_cartesian_interleaved_pommier(dst, mag, phase, count); } - -#endif +#endif // USE_POMMIER_MATHFUN template<typename S, typename T> // S source, T target void v_cartesian_to_polar(T *const BQ_R__ mag, @@ -498,7 +540,48 @@ } } -#ifdef HAVE_VDSP +#ifdef HAVE_IPP + +template<> +inline void v_cartesian_to_polar(float *const BQ_R__ mag, + float *const BQ_R__ phase, + const float *const BQ_R__ real, + const float *const BQ_R__ imag, + const int count) +{ + ippsCartToPolar_32f(real, imag, mag, phase, count); +} + +template<> +inline void v_cartesian_interleaved_to_polar(float *const BQ_R__ mag, + float *const BQ_R__ phase, + const float *const BQ_R__ src, + const int count) +{ + ippsCartToPolar_32fc((const Ipp32fc *)src, mag, phase, count); +} + +template<> +inline void v_cartesian_to_polar(double *const BQ_R__ mag, + double *const BQ_R__ phase, + const double *const BQ_R__ real, + const double *const BQ_R__ imag, + const int count) +{ + ippsCartToPolar_64f(real, imag, mag, phase, count); +} + +template<> +inline void v_cartesian_interleaved_to_polar(double *const BQ_R__ mag, + double *const BQ_R__ phase, + const double *const BQ_R__ src, + const int count) +{ + ippsCartToPolar_64fc((const Ipp64fc *)src, mag, phase, count); +} + +#elif defined HAVE_VDSP + template<> inline void v_cartesian_to_polar(float *const BQ_R__ mag, float *const BQ_R__ phase, @@ -513,6 +596,7 @@ vvsqrtf(mag, phase, &count); // using phase as the source vvatan2f(phase, imag, real, &count); } + template<> inline void v_cartesian_to_polar(double *const BQ_R__ mag, double *const BQ_R__ phase, @@ -528,7 +612,7 @@ vvsqrt(mag, phase, &count); // using phase as the source vvatan2(phase, imag, real, &count); } -#endif +#endif // HAVE_VDSP template<typename T> void v_cartesian_to_polar_interleaved_inplace(T *const BQ_R__ srcdst, @@ -542,6 +626,63 @@ } } +template<typename S, typename T> // S source, T target +void v_cartesian_to_magnitudes(T *const BQ_R__ mag, + const S *const BQ_R__ real, + const S *const BQ_R__ imag, + const int count) +{ + for (int i = 0; i < count; ++i) { + mag[i] = T(sqrt(real[i] * real[i] + imag[i] * imag[i])); + } +} + +template<typename S, typename T> // S source, T target +void v_cartesian_interleaved_to_magnitudes(T *const BQ_R__ mag, + const S *const BQ_R__ src, + const int count) +{ + for (int i = 0; i < count; ++i) { + mag[i] = T(sqrt(src[i*2] * src[i*2] + src[i*2+1] * src[i*2+1])); + } +} + +#ifdef HAVE_IPP +template<> +inline void v_cartesian_to_magnitudes(float *const BQ_R__ mag, + const float *const BQ_R__ real, + const float *const BQ_R__ imag, + const int count) +{ + ippsMagnitude_32f(real, imag, mag, count); +} + +template<> +inline void v_cartesian_to_magnitudes(double *const BQ_R__ mag, + const double *const BQ_R__ real, + const double *const BQ_R__ imag, + const int count) +{ + ippsMagnitude_64f(real, imag, mag, count); +} + +template<> +inline void v_cartesian_interleaved_to_magnitudes(float *const BQ_R__ mag, + const float *const BQ_R__ src, + const int count) +{ + ippsMagnitude_32fc((const Ipp32fc *)src, mag, count); +} + +template<> +inline void v_cartesian_interleaved_to_magnitudes(double *const BQ_R__ mag, + const double *const BQ_R__ src, + const int count) +{ + ippsMagnitude_64fc((const Ipp64fc *)src, mag, count); +} +#endif + } #endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bqvec/build/Makefile.inc Tue Nov 19 10:13:32 2019 +0000 @@ -0,0 +1,74 @@ + +SRC_DIR := src +TEST_DIR := test +HEADER_DIR := bqvec + +SOURCES := $(wildcard $(SRC_DIR)/*.cpp) +HEADERS := $(wildcard $(HEADER_DIR)/*.h) $(wildcard $(SRC_DIR)/*.h) + +OBJECTS := $(SOURCES:.cpp=.o) +OBJECTS := $(OBJECTS:.c=.o) + +TIMINGS_SOURCES := $(TEST_DIR)/Timings.cpp +TIMINGS_OBJECTS := $(TIMINGS_SOURCES:.cpp=.o) + +TEST_SOURCES := $(wildcard $(TEST_DIR)/*.cpp) +TEST_OBJECTS := $(TEST_SOURCES:.cpp=.o) + +OPTFLAGS := -O3 -ffast-math + +CXXFLAGS := -std=c++98 -fpic -Wall -Wextra -Werror $(VECTOR_DEFINES) $(ALLOCATOR_DEFINES) -I. $(THIRD_PARTY_INCLUDES) -I$(HEADER_DIR) $(OPTFLAGS) + +LIBRARY := libbqvec.a + +all: $(LIBRARY) timings + +test: $(LIBRARY) timings test-allocators test-vectorops test-vectorops-complex + ./test-allocators && ./test-vectorops && ./test-vectorops-complex + +valgrind: $(LIBRARY) timings test-allocators test-vectorops test-vectorops-complex + valgrind ./test-allocators && valgrind ./test-vectorops && valgrind ./test-vectorops-complex + +$(LIBRARY): $(OBJECTS) + $(AR) rc $@ $^ + +timings: $(TIMINGS_OBJECTS) $(LIBRARY) + $(CXX) $(CXXFLAGS) -o $@ $^ $(THIRD_PARTY_LIBS) + +test-allocators: test/TestAllocators.o $(LIBRARY) + $(CXX) $(CXXFLAGS) -o $@ $^ -lboost_unit_test_framework -L. -lbqvec $(THIRD_PARTY_LIBS) + +test-vectorops: test/TestVectorOps.o $(LIBRARY) + $(CXX) $(CXXFLAGS) -o $@ $^ -lboost_unit_test_framework -L. -lbqvec $(THIRD_PARTY_LIBS) + +test-vectorops-complex: test/TestVectorOpsComplex.o $(LIBRARY) + $(CXX) $(CXXFLAGS) -o $@ $^ -lboost_unit_test_framework -L. -lbqvec $(THIRD_PARTY_LIBS) + +clean: + rm -f $(OBJECTS) $(TEST_OBJECTS) $(TIMINGS_OBJECTS) + +distclean: clean + rm -f $(LIBRARY) test-allocators test-vectorops test-vectorops-complex + +depend: + makedepend -Y -fbuild/Makefile.inc $(TIMINGS_SOURCES) $(TEST_SOURCES) $(HEADERS) $(SOURCES) + + +# DO NOT DELETE + +test/Timings.o: bqvec/VectorOpsComplex.h bqvec/VectorOps.h bqvec/Restrict.h +test/Timings.o: bqvec/ComplexTypes.h +test/Timings.o: bqvec/VectorOpsComplex.h bqvec/VectorOps.h bqvec/Restrict.h +test/Timings.o: bqvec/ComplexTypes.h +test/TestVectorOpsComplex.o: bqvec/VectorOpsComplex.h bqvec/VectorOps.h +test/TestVectorOpsComplex.o: bqvec/Restrict.h bqvec/ComplexTypes.h +test/TestVectorOpsComplex.o: bqvec/VectorOps.h +test/TestVectorOps.o: bqvec/VectorOps.h +test/TestAllocators.o: bqvec/VectorOps.h bqvec/Allocators.h +bqvec/RingBuffer.o: bqvec/Barrier.h bqvec/Allocators.h bqvec/Restrict.h +bqvec/RingBuffer.o: bqvec/VectorOps.h +bqvec/VectorOpsComplex.o: bqvec/VectorOps.h bqvec/Restrict.h +bqvec/VectorOpsComplex.o: bqvec/ComplexTypes.h +bqvec/VectorOps.o: bqvec/Restrict.h +src/Allocators.o: bqvec/Allocators.h +src/Barrier.o: bqvec/Barrier.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bqvec/build/Makefile.linux Tue Nov 19 10:13:32 2019 +0000 @@ -0,0 +1,9 @@ + +VECTOR_DEFINES := + +THIRD_PARTY_INCLUDES := +THIRD_PARTY_LIBS := + +ALLOCATOR_DEFINES := -DHAVE_POSIX_MEMALIGN + +include build/Makefile.inc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bqvec/build/Makefile.linux.ipp Tue Nov 19 10:13:32 2019 +0000 @@ -0,0 +1,10 @@ + +VECTOR_DEFINES := -DHAVE_IPP + +ALLOCATOR_DEFINES := -DHAVE_POSIX_MEMALIGN + +THIRD_PARTY_INCLUDES := -I/opt/intel/ipp/include +THIRD_PARTY_LIBS := -L/opt/intel/ipp/lib/intel64_lin -Wl,-Bstatic -lipps -lippvm -lippcore -Wl,-Bdynamic + +include build/Makefile.inc +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bqvec/build/Makefile.linux.min Tue Nov 19 10:13:32 2019 +0000 @@ -0,0 +1,13 @@ + +# This Makefile resembles the sort of settings one might use on a +# small device without library support. It's mainly here as another CI +# test target that is quite different from the default configuration + +VECTOR_DEFINES := -DNO_EXCEPTIONS -DUSE_SINGLE_PRECISION_COMPLEX -DUSE_POMMIER_MATHFUN -DUSE_APPROXIMATE_ATAN2 + +ALLOCATOR_DEFINES := -DUSE_OWN_ALIGNED_MALLOC -DLACK_POSIX_MEMALIGN -DMALLOC_IS_NOT_ALIGNED + +THIRD_PARTY_INCLUDES := +THIRD_PARTY_LIBS := + +include build/Makefile.inc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bqvec/build/Makefile.osx Tue Nov 19 10:13:32 2019 +0000 @@ -0,0 +1,9 @@ + +VECTOR_DEFINES := -DHAVE_VDSP + +ALLOCATOR_DEFINES := -DMALLOC_IS_ALIGNED + +THIRD_PARTY_INCLUDES := +THIRD_PARTY_LIBS := -framework Accelerate + +include build/Makefile.inc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bqvec/build/run-platform-tests.sh Tue Nov 19 10:13:32 2019 +0000 @@ -0,0 +1,84 @@ +#!/bin/bash + +if [ -z "$1" ]; then + echo "Usage: $0 <platformtag>" + exit 2 +fi + +platformtag="$1" + +set -eu + +ippdir=/opt/intel/ipp + +echo +if [ -d "$ippdir" ]; then + echo "Found IPP directory $ippdir, considering IPP as well as other options" +else + echo "No IPP directory $ippdir, not testing with IPP" +fi + +if valgrind --version >/dev/null 2>&1 ; +then + have_valgrind=yes +else + echo + echo "No valgrind executable found, not using valgrind" + have_valgrind=no +fi + +tmpfile=$(mktemp "/tmp/test_XXXXXX") +trap "rm -f $tmpfile" 0 + +run() { + successtext="$1" + shift + echo -n "Running \"$@\"..." + if "$@" > "$tmpfile" 2>&1 ; then + if [ -z "$successtext" ] || fgrep -q "$successtext" "$tmpfile" ; then + echo " OK" + return 0 + else + echo " Failed" + cat "$tmpfile" + return 1 + fi + else + echo " Failed" + cat "$tmpfile" + return 1 + fi +} + +for mf in Makefile build/Makefile.$platformtag build/Makefile.$platformtag.* ; do + + case "$mf" in + *~) continue;; + *.bak) continue;; + *ipp) + if [ ! -d "$ippdir" ]; then + continue + fi;; + esac + + if [ ! -f "$mf" ]; then + continue + fi + + echo + echo "Building and testing with $mf:" + echo + + make -f "$mf" clean >/dev/null + run "No errors detected" make -f "$mf" test + + if [ "$have_valgrind" = "yes" ]; then + for t in test-* ; do + if [ -x "$t" ]; then + run "no leaks are possible" valgrind --leak-check=full ./"$t" + fi + done + fi +done + +
--- a/bqvec/pommier/sse_mathfun.h Sat Nov 12 09:59:34 2016 +0000 +++ b/bqvec/pommier/sse_mathfun.h Tue Nov 19 10:13:32 2019 +0000 @@ -61,7 +61,7 @@ #define _PI32_CONST(Name, Val) \ static const ALIGN16_BEG int _pi32_##Name[4] ALIGN16_END = { Val, Val, Val, Val } #define _PS_CONST_TYPE(Name, Type, Val) \ - static const ALIGN16_BEG Type _ps_##Name[4] ALIGN16_END = { Val, Val, Val, Val } + static const ALIGN16_BEG Type _ps_##Name[4] ALIGN16_END = { (Type)Val, (Type)Val, (Type)Val, (Type)Val } _PS_CONST(1 , 1.0f); _PS_CONST(0p5, 0.5f);
--- a/bqvec/src/Allocators.cpp Sat Nov 12 09:59:34 2016 +0000 +++ b/bqvec/src/Allocators.cpp Tue Nov 19 10:13:32 2019 +0000 @@ -6,7 +6,7 @@ A small library for vector arithmetic and allocation in C++ using raw C pointer arrays. - Copyright 2007-2014 Particular Programs Ltd. + Copyright 2007-2016 Particular Programs Ltd. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation @@ -33,13 +33,15 @@ Software without prior written authorization. */ -#include "bqvec/Allocators.h" +#include "Allocators.h" #ifdef HAVE_IPP #include <ipps.h> #endif #include <iostream> +#include <climits> + using std::cerr; using std::endl; @@ -50,16 +52,52 @@ template <> float *allocate(size_t count) { - float *ptr = ippsMalloc_32f(count); - if (!ptr) throw (std::bad_alloc()); + if (count > INT_MAX) { +#ifndef NO_EXCEPTIONS + throw std::length_error("Size overflow in allocate"); +#else + abort(); +#endif + } + + float *ptr = ippsMalloc_32f(int(count)); + if (!ptr) { +#ifndef NO_EXCEPTIONS + throw (std::bad_alloc()); +#else + abort(); +#endif + } + + for (size_t i = 0; i < count; ++i) { + new (ptr + i) float; + } return ptr; } template <> double *allocate(size_t count) { - double *ptr = ippsMalloc_64f(count); - if (!ptr) throw (std::bad_alloc()); + if (count > INT_MAX) { +#ifndef NO_EXCEPTIONS + throw std::length_error("Size overflow in allocate"); +#else + abort(); +#endif + } + + double *ptr = ippsMalloc_64f(int(count)); + if (!ptr) { +#ifndef NO_EXCEPTIONS + throw (std::bad_alloc()); +#else + abort(); +#endif + } + + for (size_t i = 0; i < count; ++i) { + new (ptr + i) double; + } return ptr; }
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bqvec/src/Barrier.cpp Tue Nov 19 10:13:32 2019 +0000 @@ -0,0 +1,80 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + bqvec + + A small library for vector arithmetic and allocation in C++ using + raw C pointer arrays. + + Copyright 2007-2018 Particular Programs Ltd. + + Permission is hereby granted, free of charge, to any person + obtaining a copy of this software and associated documentation + files (the "Software"), to deal in the Software without + restriction, including without limitation the rights to use, copy, + modify, merge, publish, distribute, sublicense, and/or sell copies + of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + Except as contained in this notice, the names of Chris Cannam and + Particular Programs Ltd shall not be used in advertising or + otherwise to promote the sale, use or other dealings in this + Software without prior written authorization. +*/ + +#include "Barrier.h" + +#if defined __APPLE__ +#if !defined __MAC_10_12 +#include <libkern/OSAtomic.h> +#endif +#endif +#if defined _WIN32 && defined _MSC_VER +#include <Windows.h> +#endif + +namespace breakfastquay { + +void system_memorybarrier() +{ +#if defined __APPLE__ + +#if defined __MAC_10_12 + __sync_synchronize(); +#else + OSMemoryBarrier(); +#endif + +#elif (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1) + + __sync_synchronize(); + +#elif defined _WIN32 + +#if defined _MSC_VER + MemoryBarrier(); +#else /* (mingw) */ + LONG Barrier = 0; + __asm__ __volatile__("xchgl %%eax,%0 " + : "=r" (Barrier)); +#endif + +#else +#warning "No memory barrier defined" +#endif + +} + +} +
--- a/bqvec/src/VectorOpsComplex.cpp Sat Nov 12 09:59:34 2016 +0000 +++ b/bqvec/src/VectorOpsComplex.cpp Tue Nov 19 10:13:32 2019 +0000 @@ -6,7 +6,7 @@ A small library for vector arithmetic and allocation in C++ using raw C pointer arrays. - Copyright 2007-2015 Particular Programs Ltd. + Copyright 2007-2016 Particular Programs Ltd. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation @@ -33,15 +33,10 @@ Software without prior written authorization. */ -#include "bqvec/VectorOpsComplex.h" +#include "VectorOpsComplex.h" #include <cassert> -#ifdef __MSVC__ -#include <malloc.h> -#define alloca _alloca -#endif - #if defined USE_POMMIER_MATHFUN #if defined __ARMEL__ #include "pommier/neon_mathfun.h" @@ -50,10 +45,18 @@ #endif #endif -#ifdef __MSVC__ +#if defined(_MSC_VER) || defined(WIN32) #include <malloc.h> +#ifndef alloca #define alloca _alloca #endif +#else +#include <alloca.h> +#endif + +#include <iostream> + +using namespace std; namespace breakfastquay { @@ -86,6 +89,8 @@ if (imag < 0.f) atan -= pi; } } + + return atan; } #endif @@ -216,8 +221,6 @@ #ifndef NO_COMPLEX_TYPES -#if defined HAVE_IPP - void v_polar_to_cartesian(bq_complex_t *const BQ_R__ dst, const bq_complex_element_t *const BQ_R__ mag, @@ -225,58 +228,52 @@ const int count) { if (sizeof(bq_complex_element_t) == sizeof(float)) { - ippsPolarToCart_32fc((const float *)mag, (const float *)phase, - (Ipp32fc *)dst, count); + v_polar_to_cartesian_interleaved((float *)dst, + (const float *)mag, + (const float *)phase, + count); } else { - ippsPolarToCart_64fc((const double *)mag, (const double *)phase, - (Ipp64fc *)dst, count); + v_polar_to_cartesian_interleaved((double *)dst, + (const double *)mag, + (const double *)phase, + count); } } -#elif defined HAVE_VDSP +void +v_cartesian_to_polar(bq_complex_element_t *const BQ_R__ mag, + bq_complex_element_t *const BQ_R__ phase, + const bq_complex_t *const BQ_R__ src, + const int count) +{ + if (sizeof(bq_complex_element_t) == sizeof(float)) { + v_cartesian_interleaved_to_polar((float *)mag, + (float *)phase, + (const float *)src, + count); + } else { + v_cartesian_interleaved_to_polar((double *)mag, + (double *)phase, + (const double *)src, + count); + } +} void -v_polar_to_cartesian(bq_complex_t *const BQ_R__ dst, - const bq_complex_element_t *const BQ_R__ mag, - const bq_complex_element_t *const BQ_R__ phase, - const int count) +v_cartesian_to_magnitudes(bq_complex_element_t *const BQ_R__ mag, + const bq_complex_t *const BQ_R__ src, + const int count) { - bq_complex_element_t *sc = (bq_complex_element_t *) - alloca(count * 2 * sizeof(bq_complex_element_t)); - if (sizeof(bq_complex_element_t) == sizeof(float)) { - vvsincosf((float *)sc, (float *)(sc + count), (float *)phase, &count); + v_cartesian_interleaved_to_magnitudes((float *)mag, + (const float *)src, + count); } else { - vvsincos((double *)sc, (double *)(sc + count), (double *)phase, &count); + v_cartesian_interleaved_to_magnitudes((double *)mag, + (const double *)src, + count); } - - int sini = 0; - int cosi = count; - - for (int i = 0; i < count; ++i) { - dst[i].re = mag[i] * sc[cosi++]; - dst[i].im = mag[i] * sc[sini++]; - } -} - -#else - -void -v_polar_to_cartesian(bq_complex_t *const BQ_R__ dst, - const bq_complex_element_t *const BQ_R__ mag, - const bq_complex_element_t *const BQ_R__ phase, - const int count) -{ - for (int i = 0; i < count; ++i) { - dst[i] = c_phasor(phase[i]); - } - for (int i = 0; i < count; ++i) { - dst[i].re *= mag[i]; - dst[i].im *= mag[i]; - } -} - -#endif +} #if defined USE_POMMIER_MATHFUN @@ -356,7 +353,6 @@ v_polar_interleaved_to_cartesian_inplace(bq_complex_element_t *const BQ_R__ srcdst, const int count) { - // Not ideal bq_complex_element_t mag, phase; int ii = 0, io = 0; for (int i = 0; i < count; ++i) { @@ -368,6 +364,6 @@ } } -#endif +#endif // NO_COMPLEX_TYPES }
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bqvec/test/TestAllocators.cpp Tue Nov 19 10:13:32 2019 +0000 @@ -0,0 +1,85 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +#include "bqvec/VectorOps.h" +#include "bqvec/Allocators.h" + +#define BOOST_TEST_DYN_LINK +#define BOOST_TEST_MAIN + +#include <boost/test/unit_test.hpp> + +#include <stdexcept> +#include <vector> + +using namespace breakfastquay; + +BOOST_AUTO_TEST_SUITE(TestAllocators) + +#define COMPARE_ARRAY(a, b) \ + for (int cmp_i = 0; cmp_i < (int)(sizeof(a)/sizeof(a[0])); ++cmp_i) { \ + BOOST_CHECK_SMALL(a[cmp_i] - b[cmp_i], 1e-14); \ + } + +#define COMPARE_N(a, b, n) \ + for (int cmp_i = 0; cmp_i < n; ++cmp_i) { \ + BOOST_CHECK_SMALL(a[cmp_i] - b[cmp_i], 1e-14); \ + } + +BOOST_AUTO_TEST_CASE(alloc_dealloc) +{ + double *v = allocate<double>(4); + v[0] = 0.1; + v[1] = 2.0; + v[2] = -0.3; + v[3] = 4.0; + double *e = allocate<double>(4); + e[0] = -0.3; + e[1] = 4.0; + e[2] = 0.1; + e[3] = 2.0; + v_fftshift(v, 4); + COMPARE_N(v, e, 4); + deallocate(v); + deallocate(e); +} + +BOOST_AUTO_TEST_CASE(alloc_zero) +{ + double *v = allocate_and_zero<double>(4); + BOOST_CHECK_EQUAL(v[0], 0.f); + BOOST_CHECK_EQUAL(v[1], 0.f); + BOOST_CHECK_EQUAL(v[2], 0.f); + BOOST_CHECK_EQUAL(v[3], 0.f); + deallocate(v); +} + +BOOST_AUTO_TEST_CASE(alloc_dealloc_channels) +{ + double **v = allocate_channels<double>(2, 4); + v[0][0] = 0.1; + v[0][1] = 2.0; + v[0][2] = -0.3; + v[0][3] = 4.0; + v[1][0] = -0.3; + v[1][1] = 4.0; + v[1][2] = 0.1; + v[1][3] = 2.0; + v_fftshift(v[0], 4); + COMPARE_N(v[0], v[1], 4); + deallocate_channels(v, 2); +} + +BOOST_AUTO_TEST_CASE(stl) +{ + std::vector<double, StlAllocator<double> > v; + v.push_back(0.1); + v.push_back(2.0); + v.push_back(-0.3); + v.push_back(4.0); + double e[] = { -0.3, 4.0, 0.1, 2.0 }; + v_fftshift(v.data(), 4); + COMPARE_N(v.data(), e, 4); +} + +BOOST_AUTO_TEST_SUITE_END() +
--- a/bqvec/test/TestVectorOps.cpp Sat Nov 12 09:59:34 2016 +0000 +++ b/bqvec/test/TestVectorOps.cpp Tue Nov 19 10:13:32 2019 +0000 @@ -1,254 +1,487 @@ /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ -#include "VectorOpsComplex.h" +#include "bqvec/VectorOps.h" -#include <iostream> -#include <cstdlib> +#define BOOST_TEST_DYN_LINK +#define BOOST_TEST_MAIN -#include <time.h> +#include <boost/test/unit_test.hpp> -using namespace std; +#include <stdexcept> +#include <vector> -namespace breakfastquay { +using namespace breakfastquay; -namespace Test { +BOOST_AUTO_TEST_SUITE(TestVectorOps) -#ifdef _WIN32 -#define drand48() (-1+2*((float)rand())/RAND_MAX) -#endif - -bool -testMultiply() -{ - cerr << "testVectorOps: testing v_multiply complex" << endl; - - const int N = 1024; - turbot_complex_sample_t target[N]; - turbot_complex_sample_t src1[N]; - turbot_complex_sample_t src2[N]; - - for (int i = 0; i < N; ++i) { - src1[i].re = drand48(); - src1[i].im = drand48(); - src2[i].re = drand48(); - src2[i].im = drand48(); +#define COMPARE_ARRAY(a, b) \ + for (int cmp_i = 0; cmp_i < (int)(sizeof(a)/sizeof(a[0])); ++cmp_i) { \ + BOOST_CHECK_SMALL(a[cmp_i] - b[cmp_i], 1e-14); \ } - turbot_sample_t mean, first, last, total = 0; - for (int i = 0; i < N; ++i) { - turbot_complex_sample_t result; - c_multiply(result, src1[i], src2[i]); - if (i == 0) first = result.re; - if (i == N-1) last = result.im; - total += result.re; - total += result.im; +#define COMPARE_N(a, b, n) \ + for (int cmp_i = 0; cmp_i < n; ++cmp_i) { \ + BOOST_CHECK_SMALL(a[cmp_i] - b[cmp_i], 1e-14); \ } - mean = total / (N*2); - cerr << "Naive method: mean = " << mean << ", first = " << first - << ", last = " << last << endl; - v_multiply(target, src1, src2, N); - total = 0; - - for (int i = 0; i < N; ++i) { - if (i == 0) first = target[i].re; - if (i == N-1) last = target[i].im; - total += target[i].re; - total += target[i].im; - } - mean = total / (N*2); - cerr << "v_multiply: mean = " << mean << ", first = " << first - << ", last = " << last << endl; - - int iterations = 50000; - cerr << "Iterations: " << iterations << endl; - - cerr << "CLOCKS_PER_SEC = " << CLOCKS_PER_SEC << endl; - float divisor = float(CLOCKS_PER_SEC) / 1000.f; - - clock_t start = clock(); - - for (int j = 0; j < iterations; ++j) { - for (int i = 0; i < N; ++i) { - c_multiply(target[i], src1[i], src2[i]); - } - } - - clock_t end = clock(); - - cerr << "Time for naive method: " << float(end - start)/divisor << endl; - - start = clock(); - - for (int j = 0; j < iterations; ++j) { - v_multiply(target, src1, src2, N); - } - - end = clock(); - - cerr << "Time for v_multiply: " << float(end - start)/divisor << endl; - - return true; +BOOST_AUTO_TEST_CASE(add) +{ + double a[] = { 1.0, 2.0, 3.0 }; + double b[] = { -1.0, 3.0, -4.5 }; + double expected[] = { 0.0, 5.0, -1.5 }; + v_add(a, b, 3); + COMPARE_N(a, expected, 3); } -bool -testPolarToCart() +BOOST_AUTO_TEST_CASE(add_with_gain) { - cerr << "testVectorOps: testing v_polar_to_cartesian" << endl; - - const int N = 1024; - turbot_complex_sample_t target[N]; - turbot_sample_t mag[N]; - turbot_sample_t phase[N]; - - for (int i = 0; i < N; ++i) { - mag[i] = drand48(); - phase[i] = (drand48() * M_PI * 2) - M_PI; - } - - turbot_sample_t mean, first, last, total = 0; - for (int i = 0; i < N; ++i) { - turbot_sample_t real = mag[i] * cos(phase[i]); - turbot_sample_t imag = mag[i] * sin(phase[i]); - if (i == 0) first = real; - if (i == N-1) last = imag; - total += real; - total += imag; - } - mean = total / (N*2); - cerr << "Naive method: mean = " << mean << ", first = " << first - << ", last = " << last << endl; - - v_polar_to_cartesian(target, mag, phase, N); - - total = 0; - - for (int i = 0; i < N; ++i) { - if (i == 0) first = target[i].re; - if (i == N-1) last = target[i].im; - total += target[i].re; - total += target[i].im; - } - mean = total / (N*2); - cerr << "v_polar_to_cartesian: mean = " << mean << ", first = " << first - << ", last = " << last << endl; - - int iterations = 10000; - cerr << "Iterations: " << iterations << endl; - - cerr << "CLOCKS_PER_SEC = " << CLOCKS_PER_SEC << endl; - float divisor = float(CLOCKS_PER_SEC) / 1000.f; - - clock_t start = clock(); - - for (int j = 0; j < iterations; ++j) { - for (int i = 0; i < N; ++i) { - target[i].re = mag[i] * cos(phase[i]); - target[i].im = mag[i] * sin(phase[i]); - } - } - - clock_t end = clock(); - - cerr << "Time for naive method: " << float(end - start)/divisor << endl; - - start = clock(); - - for (int j = 0; j < iterations; ++j) { - v_polar_to_cartesian(target, mag, phase, N); - } - - end = clock(); - - cerr << "Time for v_polar_to_cartesian: " << float(end - start)/divisor << endl; - - return true; + double a[] = { 1.0, 2.0, 3.0 }; + double b[] = { -1.0, 3.0, -4.5 }; + double expected[] = { -0.5, 6.5, -3.75 }; + v_add_with_gain(a, b, 1.5, 3); + COMPARE_N(a, expected, 3); } -bool -testPolarToCartInterleaved() +BOOST_AUTO_TEST_CASE(subtract) { - cerr << "testVectorOps: testing v_polar_interleaved_to_cartesian" << endl; - - const int N = 1024; - turbot_complex_sample_t target[N]; - turbot_sample_t source[N*2]; - - for (int i = 0; i < N; ++i) { - source[i*2] = drand48(); - source[i*2+1] = (drand48() * M_PI * 2) - M_PI; - } - - turbot_sample_t mean, first, last, total = 0; - for (int i = 0; i < N; ++i) { - turbot_sample_t real = source[i*2] * cos(source[i*2+1]); - turbot_sample_t imag = source[i*2] * sin(source[i*2+1]); - if (i == 0) first = real; - if (i == N-1) last = imag; - total += real; - total += imag; - } - mean = total / (N*2); - cerr << "Naive method: mean = " << mean << ", first = " << first - << ", last = " << last << endl; - - v_polar_interleaved_to_cartesian(target, source, N); - - total = 0; - - for (int i = 0; i < N; ++i) { - if (i == 0) first = target[i].re; - if (i == N-1) last = target[i].im; - total += target[i].re; - total += target[i].im; - } - mean = total / (N*2); - cerr << "v_polar_interleaved_to_cartesian: mean = " << mean << ", first = " << first - << ", last = " << last << endl; - - int iterations = 10000; - cerr << "Iterations: " << iterations << endl; - - cerr << "CLOCKS_PER_SEC = " << CLOCKS_PER_SEC << endl; - float divisor = float(CLOCKS_PER_SEC) / 1000.f; - - clock_t start = clock(); - - for (int j = 0; j < iterations; ++j) { - for (int i = 0; i < N; ++i) { - target[i].re = source[i*2] * cos(source[i*2+1]); - target[i].im = source[i*2] * sin(source[i*2+1]); - } - } - - clock_t end = clock(); - - cerr << "Time for naive method: " << float(end - start)/divisor << endl; - - start = clock(); - - for (int j = 0; j < iterations; ++j) { - v_polar_interleaved_to_cartesian(target, source, N); - } - - end = clock(); - - cerr << "Time for v_polar_interleaved_to_cartesian: " << float(end - start)/divisor << endl; - - return true; + double a[] = { 1.0, 2.0, 3.0 }; + double b[] = { -1.0, 3.0, -4.5 }; + double expected[] = { 2.0, -1.0, 7.5 }; + v_subtract(a, b, 3); + COMPARE_N(a, expected, 3); } -bool -testVectorOps() +BOOST_AUTO_TEST_CASE(increment) { - if (!testMultiply()) return false; - if (!testPolarToCart()) return false; - if (!testPolarToCartInterleaved()) return false; - - return true; + double a[] = { -1.0, 3.0, -4.5 }; + double incr = -0.5; + double expected[] = { -1.5, 2.5, -5.0 }; + v_increment(a, incr, 3); + COMPARE_N(a, expected, 3); } +BOOST_AUTO_TEST_CASE(scale) +{ + double a[] = { -1.0, 3.0, -4.5 }; + double scale = -0.5; + double expected[] = { 0.5, -1.5, 2.25 }; + v_scale(a, scale, 3); + COMPARE_N(a, expected, 3); } +BOOST_AUTO_TEST_CASE(multiply) +{ + double a[] = { 1.0, 2.0, 3.0 }; + double b[] = { -1.0, 3.0, -4.5 }; + double expected[] = { -1.0, 6.0, -13.5 }; + v_multiply(a, b, 3); + COMPARE_N(a, expected, 3); } +BOOST_AUTO_TEST_CASE(multiply_to) +{ + double a[] = { 1.0, 2.0, 3.0 }; + double b[] = { -1.0, 3.0, -4.5 }; + double o[3]; + double expected[] = { -1.0, 6.0, -13.5 }; + v_multiply_to(o, a, b, 3); + COMPARE_N(o, expected, 3); +} + +BOOST_AUTO_TEST_CASE(multiply_and_add) +{ + double a[] = { 1.0, 2.0, 3.0 }; + double b[] = { -1.0, 3.0, -4.5 }; + double c[] = { 3.0, -1.0, 4.0 }; + double expected[] = { 2.0, 5.0, -9.5 }; + v_multiply_and_add(c, a, b, 3); + COMPARE_N(c, expected, 3); +} + +BOOST_AUTO_TEST_CASE(divide) +{ + double a[] = { 1.0, 2.0, 3.0 }; + double b[] = { -1.0, 3.0, -4.5 }; + double expected[] = { -1.0, 2.0/3.0, 3.0/-4.5 }; + v_divide(a, b, 3); + COMPARE_N(a, expected, 3); +} + +BOOST_AUTO_TEST_CASE(sum) +{ + double a[] = { 1.0, 2.0, -3.5 }; + double s = v_sum(a, 3); + BOOST_CHECK_EQUAL(s, -0.5); +} + +BOOST_AUTO_TEST_CASE(multiply_and_sum) +{ + double a[] = { 2.0, 0.0, -1.5 }; + double b[] = { 3.0, 4.0, 5.0 }; + double s = v_multiply_and_sum(a, b, 3); + BOOST_CHECK_EQUAL(s, -1.5); +} + +BOOST_AUTO_TEST_CASE(log) +{ + double a[] = { 1.0, 1.0 / M_E, M_E }; + double expected[] = { 0.0, -1.0, 1.0 }; + v_log(a, 3); + COMPARE_N(a, expected, 3); +} + +BOOST_AUTO_TEST_CASE(exp) +{ + double a[] = { 0.0, -1.0, 2.0 }; + double expected[] = { 1.0, 1.0 / M_E, M_E * M_E }; + v_exp(a, 3); + COMPARE_N(a, expected, 3); +} + +BOOST_AUTO_TEST_CASE(sqrt) +{ + double a[] = { 0.0, 1.0, 4.0 }; + double expected[] = { 0.0, 1.0, 2.0 }; + v_sqrt(a, 3); + COMPARE_N(a, expected, 3); +} + +BOOST_AUTO_TEST_CASE(square) +{ + double a[] = { 0.0, 1.5, -2.0 }; + double expected[] = { 0.0, 2.25, 4.0 }; + v_square(a, 3); + COMPARE_N(a, expected, 3); +} + +BOOST_AUTO_TEST_CASE(abs) +{ + double a[] = { -1.9, 0.0, 0.01, -0.0 }; + double expected[] = { 1.9, 0.0, 0.01, 0.0 }; + v_abs(a, 4); + COMPARE_N(a, expected, 4); +} + +BOOST_AUTO_TEST_CASE(mean) +{ + double a[] = { -1.0, 1.6, 3.0 }; + double s = v_mean(a, 3); + BOOST_CHECK_EQUAL(s, 1.2); +} + +BOOST_AUTO_TEST_CASE(interleave_1) +{ + double a[] = { 1.0, 2.0, 3.0 }; + double *ch[] = { a }; + double o[3]; + double expected[] = { 1.0, 2.0, 3.0 }; + v_interleave(o, ch, 1, 3); + COMPARE_N(o, expected, 3); +} + +BOOST_AUTO_TEST_CASE(interleave_2) +{ + double a[] = { 1.0, 2.0, 3.0 }; + double b[] = { 4.0, 5.0, 6.0 }; + double *ch[] = { a, b }; + double o[6]; + double expected[] = { 1.0, 4.0, 2.0, 5.0, 3.0, 6.0 }; + v_interleave(o, ch, 2, 3); + COMPARE_N(o, expected, 6); +} + +BOOST_AUTO_TEST_CASE(interleave_3) +{ + double a[] = { 1.0, 2.0 }; + double b[] = { 3.0, 4.0 }; + double c[] = { 5.0, 6.0 }; + double *ch[] = { a, b, c }; + double o[6]; + double expected[] = { 1.0, 3.0, 5.0, 2.0, 4.0, 6.0 }; + v_interleave(o, ch, 3, 2); + COMPARE_N(o, expected, 6); +} + +BOOST_AUTO_TEST_CASE(deinterleave_1) +{ + double a[] = { 1.0, 2.0, 3.0 }; + double o[3]; + double *oo[] = { o }; + double *expected[] = { a }; + v_deinterleave(oo, a, 1, 3); + COMPARE_N(oo[0], expected[0], 3); +} + +BOOST_AUTO_TEST_CASE(deinterleave_2) +{ + double a[] = { 1.0, 4.0, 2.0, 5.0, 3.0, 6.0 }; + double o1[3], o2[3]; + double *oo[] = { o1, o2 }; + double e1[] = { 1.0, 2.0, 3.0 }, e2[] = { 4.0, 5.0, 6.0 }; + double *expected[] = { e1, e2 }; + v_deinterleave(oo, a, 2, 3); + COMPARE_N(oo[0], expected[0], 3); + COMPARE_N(oo[1], expected[1], 3); +} + +BOOST_AUTO_TEST_CASE(deinterleave_3) +{ + double a[] = { 1.0, 3.0, 5.0, 2.0, 4.0, 6.0 }; + double o1[2], o2[2], o3[2]; + double *oo[] = { o1, o2, o3 }; + double e1[] = { 1.0, 2.0 }, e2[] = { 3.0, 4.0 }, e3[] = { 5.0, 6.0 }; + double *expected[] = { e1, e2, e3 }; + v_deinterleave(oo, a, 3, 2); + COMPARE_N(oo[0], expected[0], 2); + COMPARE_N(oo[1], expected[1], 2); + COMPARE_N(oo[2], expected[2], 2); +} + +BOOST_AUTO_TEST_CASE(mix_1) +{ + double a[] = { 1.0, 2.0, 3.0 }; + double *ch[] = { a }; + double o[3]; + double expected[] = { 1.0, 2.0, 3.0 }; + v_mix(o, ch, 1, 3); + COMPARE_N(o, expected, 3); +} + +BOOST_AUTO_TEST_CASE(mix_2) +{ + double a[] = { 1.0, 2.0, 3.0 }; + double b[] = { 4.0, 5.0, 6.0 }; + double *ch[] = { a, b }; + double o[6]; + double expected[] = { 2.5, 3.5, 4.5 }; + v_mix(o, ch, 2, 3); + COMPARE_N(o, expected, 3); +} + +BOOST_AUTO_TEST_CASE(mix_3) +{ + double a[] = { 1.0, 2.0 }; + double b[] = { 3.0, 4.0 }; + double c[] = { 5.0, 6.0 }; + double *ch[] = { a, b, c }; + double o[6]; + double expected[] = { 3.0, 4.0 }; + v_mix(o, ch, 3, 2); + COMPARE_N(o, expected, 2); +} + +BOOST_AUTO_TEST_CASE(reconfigure_1_2) +{ + double a[] = { 1.0, 2.0, 3.0 }; + double *aa[] = { a }; + double o1[3], o2[3]; + double *oo[] = { o1, o2 }; + double e1[] = { 1.0, 2.0, 3.0 }; + double e2[] = { 1.0, 2.0, 3.0 }; + double *expected[] = { e1, e2 }; + v_reconfigure_channels(oo, 2, aa, 1, 3); + COMPARE_N(oo[0], expected[0], 3); + COMPARE_N(oo[1], expected[1], 3); +} + +BOOST_AUTO_TEST_CASE(reconfigure_2_1) +{ + double a1[] = { 1.0, 2.0, 3.0 }; + double a2[] = { 4.0, 5.0, 6.0 }; + double *aa[] = { a1, a2 }; + double o1[3]; + double *oo[] = { o1 }; + double e1[] = { 2.5, 3.5, 4.5 }; + double *expected[] = { e1 }; + v_reconfigure_channels(oo, 1, aa, 2, 3); + COMPARE_N(oo[0], expected[0], 3); +} + +BOOST_AUTO_TEST_CASE(reconfigure_3_1) +{ + double a1[] = { 1.0, 2.0 }; + double a2[] = { 3.0, 4.0 }; + double a3[] = { 5.0, 6.0 }; + double *aa[] = { a1, a2, a3 }; + double o1[2]; + double *oo[] = { o1 }; + double e1[] = { 3.0, 4.0 }; + double *expected[] = { e1 }; + v_reconfigure_channels(oo, 1, aa, 3, 2); + COMPARE_N(oo[0], expected[0], 2); +} + +BOOST_AUTO_TEST_CASE(reconfigure_1_3) +{ + double a[] = { 1.0, 2.0, 3.0 }; + double *aa[] = { a }; + double o1[3], o2[3], o3[3]; + double *oo[] = { o1, o2, o3 }; + double e1[] = { 1.0, 2.0, 3.0 }; + double e2[] = { 1.0, 2.0, 3.0 }; + double e3[] = { 1.0, 2.0, 3.0 }; + double *expected[] = { e1, e2, e3 }; + v_reconfigure_channels(oo, 3, aa, 1, 3); + COMPARE_N(oo[0], expected[0], 3); + COMPARE_N(oo[1], expected[1], 3); + COMPARE_N(oo[2], expected[2], 3); +} + +BOOST_AUTO_TEST_CASE(reconfigure_2_3) +{ + double a1[] = { 1.0, 2.0, 3.0 }; + double a2[] = { 4.0, 5.0, 6.0 }; + double *aa[] = { a1, a2 }; + double o1[3], o2[3], o3[3]; + double *oo[] = { o1, o2, o3 }; + double e1[] = { 1.0, 2.0, 3.0 }; + double e2[] = { 0.0, 0.0, 0.0 }; + double e3[] = { 4.0, 5.0, 6.0 }; + double *expected[] = { e1, e2, e3 }; + v_reconfigure_channels(oo, 3, aa, 2, 3); + COMPARE_N(oo[0], expected[0], 3); + COMPARE_N(oo[1], expected[1], 3); + COMPARE_N(oo[2], expected[2], 3); +} + +BOOST_AUTO_TEST_CASE(reconfigure_3_2) +{ + double a1[] = { 1.0, 2.0, 3.0 }; + double a2[] = { 4.0, 5.0, 6.0 }; + double a3[] = { 7.0, 8.0, 9.0 }; + double *aa[] = { a1, a2, a3 }; + double o1[3], o2[3]; + double *oo[] = { o1, o2 }; + double e1[] = { 1.0, 2.0, 3.0 }; + double e2[] = { 7.0, 8.0, 9.0 }; + double *expected[] = { e1, e2 }; + v_reconfigure_channels(oo, 2, aa, 3, 3); + COMPARE_N(oo[0], expected[0], 3); + COMPARE_N(oo[1], expected[1], 3); +} + +BOOST_AUTO_TEST_CASE(reconfigure_3_3) +{ + double a1[] = { 1.0, 2.0, 3.0 }; + double a2[] = { 4.0, 5.0, 6.0 }; + double a3[] = { 7.0, 8.0, 9.0 }; + double *aa[] = { a1, a2, a3 }; + double o1[3], o2[3], o3[3]; + double *oo[] = { o1, o2, o3 }; + double e1[] = { 1.0, 2.0, 3.0 }; + double e2[] = { 4.0, 5.0, 6.0 }; + double e3[] = { 7.0, 8.0, 9.0 }; + double *expected[] = { e1, e2, e3 }; + v_reconfigure_channels(oo, 3, aa, 3, 3); + COMPARE_N(oo[0], expected[0], 3); + COMPARE_N(oo[1], expected[1], 3); + COMPARE_N(oo[2], expected[2], 3); +} + +BOOST_AUTO_TEST_CASE(reconfigure_1_2_inplace) +{ + double a1[] = { 1.0, 2.0, 3.0 }; + double a2[3]; + double *aa[] = { a1, a2 }; + double e1[] = { 1.0, 2.0, 3.0 }; + double e2[] = { 1.0, 2.0, 3.0 }; + double *expected[] = { e1, e2 }; + v_reconfigure_channels_inplace(aa, 2, 1, 3); + COMPARE_N(aa[0], expected[0], 3); + COMPARE_N(aa[1], expected[1], 3); +} + +BOOST_AUTO_TEST_CASE(reconfigure_2_1_inplace) +{ + double a1[] = { 1.0, 2.0, 3.0 }; + double a2[] = { 4.0, 5.0, 6.0 }; + double *aa[] = { a1, a2 }; + double e1[] = { 2.5, 3.5, 4.5 }; + double *expected[] = { e1 }; + v_reconfigure_channels_inplace(aa, 1, 2, 3); + COMPARE_N(aa[0], expected[0], 3); +} + +BOOST_AUTO_TEST_CASE(reconfigure_3_1_inplace) +{ + double a1[] = { 1.0, 2.0 }; + double a2[] = { 3.0, 4.0 }; + double a3[] = { 5.0, 6.0 }; + double *aa[] = { a1, a2, a3 }; + double e1[] = { 3.0, 4.0 }; + double *expected[] = { e1 }; + v_reconfigure_channels_inplace(aa, 1, 3, 2); + COMPARE_N(aa[0], expected[0], 2); +} + +BOOST_AUTO_TEST_CASE(reconfigure_1_3_inplace) +{ + double a1[] = { 1.0, 2.0, 3.0 }; + double a2[3], a3[3]; + double *aa[] = { a1, a2, a3 }; + double e1[] = { 1.0, 2.0, 3.0 }; + double e2[] = { 1.0, 2.0, 3.0 }; + double e3[] = { 1.0, 2.0, 3.0 }; + double *expected[] = { e1, e2, e3 }; + v_reconfigure_channels_inplace(aa, 3, 1, 3); + COMPARE_N(aa[0], expected[0], 3); + COMPARE_N(aa[1], expected[1], 3); + COMPARE_N(aa[2], expected[2], 3); +} + +BOOST_AUTO_TEST_CASE(reconfigure_2_3_inplace) +{ + double a1[] = { 1.0, 2.0, 3.0 }; + double a2[] = { 4.0, 5.0, 6.0 }; + double a3[3]; + double *aa[] = { a1, a2, a3 }; + double e1[] = { 1.0, 2.0, 3.0 }; + double e2[] = { 4.0, 5.0, 6.0 }; + double e3[] = { 0.0, 0.0, 0.0 }; + double *expected[] = { e1, e2, e3 }; + v_reconfigure_channels_inplace(aa, 3, 2, 3); + COMPARE_N(aa[0], expected[0], 3); + COMPARE_N(aa[1], expected[1], 3); + COMPARE_N(aa[2], expected[2], 3); +} + +BOOST_AUTO_TEST_CASE(reconfigure_3_2_inplace) +{ + double a1[] = { 1.0, 2.0, 3.0 }; + double a2[] = { 4.0, 5.0, 6.0 }; + double a3[] = { 7.0, 8.0, 9.0 }; + double *aa[] = { a1, a2, a3 }; + double e1[] = { 1.0, 2.0, 3.0 }; + double e2[] = { 4.0, 5.0, 6.0 }; + double *expected[] = { e1, e2 }; + v_reconfigure_channels_inplace(aa, 2, 3, 3); + COMPARE_N(aa[0], expected[0], 3); + COMPARE_N(aa[1], expected[1], 3); +} + +BOOST_AUTO_TEST_CASE(reconfigure_3_3_inplace) +{ + double a1[] = { 1.0, 2.0, 3.0 }; + double a2[] = { 4.0, 5.0, 6.0 }; + double a3[] = { 7.0, 8.0, 9.0 }; + double *aa[] = { a1, a2, a3 }; + double e1[] = { 1.0, 2.0, 3.0 }; + double e2[] = { 4.0, 5.0, 6.0 }; + double e3[] = { 7.0, 8.0, 9.0 }; + double *expected[] = { e1, e2, e3 }; + v_reconfigure_channels_inplace(aa, 3, 3, 3); + COMPARE_N(aa[0], expected[0], 3); + COMPARE_N(aa[1], expected[1], 3); + COMPARE_N(aa[2], expected[2], 3); +} + +BOOST_AUTO_TEST_CASE(fftshift) +{ + double a[] = { 0.1, 2.0, -0.3, 4.0 }; + double e[] = { -0.3, 4.0, 0.1, 2.0 }; + v_fftshift(a, 4); + COMPARE_N(a, e, 4); +} + +BOOST_AUTO_TEST_SUITE_END() +
--- a/bqvec/test/TestVectorOps.h Sat Nov 12 09:59:34 2016 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,12 +0,0 @@ -/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ - -namespace breakfastquay { - -namespace Test { - -bool testVectorOps(); - -} - -} -
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bqvec/test/TestVectorOpsComplex.cpp Tue Nov 19 10:13:32 2019 +0000 @@ -0,0 +1,213 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +#include "bqvec/VectorOpsComplex.h" +#include "bqvec/VectorOps.h" + +#define BOOST_TEST_DYN_LINK +#define BOOST_TEST_MAIN + +#include <boost/test/unit_test.hpp> + +#include <stdexcept> +#include <vector> +#include <iostream> + +using namespace breakfastquay; + +using namespace std; + +BOOST_AUTO_TEST_SUITE(TestVectorOpsComplex) + +#ifdef USE_APPROXIMATE_ATAN2 +static const double eps = 5.0e-3; +#else +#ifdef USE_SINGLE_PRECISION_COMPLEX +static const double eps = 1.0e-7; +#else +static const double eps = 1.0e-14; +#endif +#endif + +#define COMPARE_N(a, b, n) \ + for (int cmp_i = 0; cmp_i < n; ++cmp_i) { \ + BOOST_CHECK_SMALL(a[cmp_i] - b[cmp_i], eps); \ + } + +#define COMPARE_NC(a, b, n) \ + for (int cmp_i = 0; cmp_i < n; ++cmp_i) { \ + BOOST_CHECK_SMALL(a[cmp_i] - b[cmp_i], (bq_complex_element_t) eps); \ + } + +#define COMPARE_CPLX_N(a, b, n) \ + for (int cmp_i = 0; cmp_i < n; ++cmp_i) { \ + BOOST_CHECK_SMALL(a[cmp_i].re - b[cmp_i].re, (bq_complex_element_t) eps); \ + BOOST_CHECK_SMALL(a[cmp_i].im - b[cmp_i].im, (bq_complex_element_t) eps); \ + } + +BOOST_AUTO_TEST_CASE(add) +{ + bq_complex_t a[] = { { 1.0, 2.0 }, { 3.0, -4.0 } }; + bq_complex_t b[] = { { -1.0, 3.0 }, { -4.5, 0.0 } }; + bq_complex_t expected[] = { { 0.0, 5.0 }, { -1.5, -4.0 } }; + v_add(a, b, 2); + COMPARE_CPLX_N(a, expected, 2); +} + +BOOST_AUTO_TEST_CASE(add_with_gain) +{ + bq_complex_t a[] = { { 1.0, 2.0 }, { 3.0, -4.0 } }; + bq_complex_t b[] = { { -1.0, 3.0 }, { -4.5, 0.0 } }; + bq_complex_t expected[] = { { -0.5, 6.5 }, { -3.75, -4.0 } }; + v_add_with_gain(a, b, (bq_complex_element_t) 1.5, 2); + COMPARE_CPLX_N(a, expected, 2); +} + +BOOST_AUTO_TEST_CASE(multiply) +{ + bq_complex_t a[] = { { 1.0, 2.0 }, { 3.0, -4.0 } }; + bq_complex_t b[] = { { -1.0, 3.0 }, { -4.5, 0.0 } }; + bq_complex_t expected[] = { { -7.0, 1.0 }, { -13.5, 18.0 } }; + v_multiply(a, b, 2); + COMPARE_CPLX_N(a, expected, 2); +} + +BOOST_AUTO_TEST_CASE(multiply_to) +{ + bq_complex_t a[] = { { 1.0, 2.0 }, { 3.0, -4.0 } }; + bq_complex_t b[] = { { -1.0, 3.0 }, { -4.5, 0.0 } }; + bq_complex_t o[2]; + bq_complex_t expected[] = { { -7.0, 1.0 }, { -13.5, 18.0 } }; + v_multiply_to(o, a, b, 2); + COMPARE_CPLX_N(o, expected, 2); +} + +BOOST_AUTO_TEST_CASE(cartesian_to_magnitudes_bq) +{ + bq_complex_t a[] = { { 1.0, 2.0 }, { 3.0, -4.0 } }; + bq_complex_element_t o[2]; + bq_complex_element_t expected[] = { sqrt(5.0), 5.0 }; + v_cartesian_to_magnitudes(o, a, 2); + COMPARE_NC(o, expected, 2); +} + +BOOST_AUTO_TEST_CASE(cartesian_to_magnitudes) +{ + double re[] = { 1.0, 3.0 }; + double im[] = { 2.0, -4.0 }; + double o[2]; + double expected[] = { sqrt(5.0), 5.0 }; + v_cartesian_to_magnitudes(o, re, im, 2); + COMPARE_N(o, expected, 2); +} + +BOOST_AUTO_TEST_CASE(cartesian_interleaved_to_magnitudes) +{ + double a[] = { 1.0, 2.0, 3.0, -4.0 }; + double o[2]; + double expected[] = { sqrt(5.0), 5.0 }; + v_cartesian_interleaved_to_magnitudes(o, a, 2); + COMPARE_N(o, expected, 2); +} + +BOOST_AUTO_TEST_CASE(cartesian_to_polar_bq) +{ + bq_complex_t a[] = { { 0.0, 0.0 }, { 1.0, 1.0 }, { 0.0, -1.0 } }; + bq_complex_element_t mo[3], po[3]; + bq_complex_element_t me[] = { 0.0, sqrt(2.0), 1.0 }; + bq_complex_element_t pe[] = { 0.0, M_PI / 4.0, -M_PI * 0.5 }; + v_cartesian_to_polar(mo, po, a, 3); + COMPARE_NC(mo, me, 3); + COMPARE_NC(po, pe, 3); +} + +BOOST_AUTO_TEST_CASE(cartesian_to_polar_interleaved_bq) +{ + bq_complex_t a[] = { { 0.0, 0.0 }, { 1.0, 1.0 }, { 0.0, -1.0 } }; + bq_complex_element_t o[6]; + bq_complex_element_t e[] = { 0.0, 0.0, sqrt(2.0), M_PI / 4.0, 1.0, -M_PI * 0.5 }; + v_cartesian_to_polar_interleaved(o, a, 3); + COMPARE_NC(o, e, 6); +} + +BOOST_AUTO_TEST_CASE(cartesian_to_polar) +{ + double re[] = { 0.0, 1.0, 0.0 }; + double im[] = { 0.0, 1.0, -1.0 }; + double mo[3], po[3]; + double me[] = { 0.0, sqrt(2.0), 1.0 }; + double pe[] = { 0.0, M_PI / 4.0, -M_PI * 0.5 }; + v_cartesian_to_polar(mo, po, re, im, 3); + COMPARE_N(mo, me, 3); + COMPARE_N(po, pe, 3); +} + +BOOST_AUTO_TEST_CASE(cartesian_to_polar_interleaved_inplace) +{ + double a[] = { 0.0, 0.0, 1.0, 1.0, 0.0, -1.0 }; + double e[] = { 0.0, 0.0, sqrt(2.0), M_PI / 4.0, 1.0, -M_PI * 0.5 }; + v_cartesian_to_polar_interleaved_inplace(a, 3); + COMPARE_N(a, e, 6); +} + +BOOST_AUTO_TEST_CASE(cartesian_interleaved_to_polar) +{ + double a[] = { 0.0, 0.0, 1.0, 1.0, 0.0, -1.0 }; + double mo[3], po[3]; + double me[] = { 0.0, sqrt(2.0), 1.0 }; + double pe[] = { 0.0, M_PI / 4.0, -M_PI * 0.5 }; + v_cartesian_interleaved_to_polar(mo, po, a, 3); + COMPARE_N(mo, me, 3); + COMPARE_N(po, pe, 3); +} + +BOOST_AUTO_TEST_CASE(polar_to_cartesian_bq) +{ + bq_complex_element_t m[] = { 0.0, sqrt(2.0), 1.0 }; + bq_complex_element_t p[] = { 0.0, M_PI / 4.0, -M_PI * 0.5 }; + bq_complex_t o[3]; + bq_complex_t e[] = { { 0.0, 0.0 }, { 1.0, 1.0 }, { 0.0, -1.0 } }; + v_polar_to_cartesian(o, m, p, 3); + COMPARE_CPLX_N(o, e, 3); +} + +BOOST_AUTO_TEST_CASE(polar_to_cartesian_interleaved_bq) +{ + bq_complex_t a[] = { { 0.0, 0.0 }, { 1.0, 1.0 }, { 0.0, -1.0 } }; + bq_complex_element_t o[6]; + bq_complex_element_t e[] = { 0.0, 0.0, sqrt(2.0), M_PI / 4.0, 1.0, -M_PI * 0.5 }; + v_cartesian_to_polar_interleaved(o, a, 3); + COMPARE_NC(o, e, 6); +} + +BOOST_AUTO_TEST_CASE(polar_to_cartesian) +{ + double m[] = { 0.0, sqrt(2.0), 1.0 }; + double p[] = { 0.0, M_PI / 4.0, -M_PI * 0.5 }; + double ro[3], io[3]; + double re[] = { 0.0, 1.0, 0.0 }; + double ie[] = { 0.0, 1.0, -1.0 }; + v_polar_to_cartesian(ro, io, m, p, 3); + COMPARE_N(ro, re, 3); + COMPARE_N(io, ie, 3); +} + +BOOST_AUTO_TEST_CASE(polar_to_cartesian_interleaved_inplace) +{ + double a[] = { 0.0, 0.0, sqrt(2.0), M_PI / 4.0, 1.0, -M_PI * 0.5 }; + double e[] = { 0.0, 0.0, 1.0, 1.0, 0.0, -1.0 }; + v_polar_interleaved_to_cartesian_inplace(a, 3); + COMPARE_N(a, e, 6); +} + +BOOST_AUTO_TEST_CASE(polar_to_cartesian_interleaved) +{ + double m[] = { 0.0, sqrt(2.0), 1.0 }; + double p[] = { 0.0, M_PI / 4.0, -M_PI * 0.5 }; + double o[6]; + double e[] = { 0.0, 0.0, 1.0, 1.0, 0.0, -1.0 }; + v_polar_to_cartesian_interleaved(o, m, p, 3); + COMPARE_N(o, e, 6); +} + +BOOST_AUTO_TEST_SUITE_END() +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bqvec/test/Timings.cpp Tue Nov 19 10:13:32 2019 +0000 @@ -0,0 +1,323 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +#include "bqvec/VectorOpsComplex.h" + +#include <iostream> +#include <cstdlib> + +#include <time.h> + +using namespace std; +using namespace breakfastquay; + +//!!! This is nonsense. TODO: Replace it with sense. + +#ifdef _WIN32 +#define drand48() (-1+2*((float)rand())/RAND_MAX) +#endif + +bool +testMultiply() +{ + cerr << "testVectorOps: testing v_multiply complex" << endl; + + const int N = 1024; + //!!! todo: use aligned allocate(), otherwise results will vary randomly + bq_complex_t target[N]; + bq_complex_t src1[N]; + bq_complex_t src2[N]; + + for (int i = 0; i < N; ++i) { + src1[i].re = drand48(); + src1[i].im = drand48(); + src2[i].re = drand48(); + src2[i].im = drand48(); + } + + double mean, first, last, total = 0; + for (int i = 0; i < N; ++i) { + bq_complex_t result; + c_multiply(result, src1[i], src2[i]); + if (i == 0) first = result.re; + if (i == N-1) last = result.im; + total += result.re; + total += result.im; + } + mean = total / (N*2); + cerr << "Naive method: mean = " << mean << ", first = " << first + << ", last = " << last << endl; + + v_multiply_to(target, src1, src2, N); + total = 0; + + for (int i = 0; i < N; ++i) { + if (i == 0) first = target[i].re; + if (i == N-1) last = target[i].im; + total += target[i].re; + total += target[i].im; + } + mean = total / (N*2); + cerr << "v_multiply: mean = " << mean << ", first = " << first + << ", last = " << last << endl; + + int iterations = 50000; +// cerr << "Iterations: " << iterations << endl; + +// cerr << "CLOCKS_PER_SEC = " << CLOCKS_PER_SEC << endl; + float divisor = float(CLOCKS_PER_SEC) / 1000.f; + + clock_t start = clock(); + + for (int j = 0; j < iterations; ++j) { + for (int i = 0; i < N; ++i) { + c_multiply(target[i], src1[i], src2[i]); + } + } + + clock_t end = clock(); + + cerr << "Time for naive method: " << float(end - start)/divisor << endl; + + start = clock(); + + for (int j = 0; j < iterations; ++j) { + v_multiply_to(target, src1, src2, N); + } + + end = clock(); + + cerr << "Time for v_multiply: " << float(end - start)/divisor << endl; + + return true; +} + +bool +testPolarToCart() +{ + cerr << "testVectorOps: testing v_polar_to_cartesian" << endl; + + const int N = 1024; + bq_complex_t target[N]; + bq_complex_element_t mag[N]; + bq_complex_element_t phase[N]; + + for (int i = 0; i < N; ++i) { + mag[i] = drand48(); + phase[i] = (drand48() * M_PI * 2) - M_PI; + } + + double mean, first, last, total = 0; + for (int i = 0; i < N; ++i) { + double real = mag[i] * cos(phase[i]); + double imag = mag[i] * sin(phase[i]); + if (i == 0) first = real; + if (i == N-1) last = imag; + total += real; + total += imag; + } + mean = total / (N*2); + cerr << "Naive method: mean = " << mean << ", first = " << first + << ", last = " << last << endl; + + v_polar_to_cartesian(target, mag, phase, N); + + total = 0; + + for (int i = 0; i < N; ++i) { + if (i == 0) first = target[i].re; + if (i == N-1) last = target[i].im; + total += target[i].re; + total += target[i].im; + } + mean = total / (N*2); + cerr << "v_polar_to_cartesian: mean = " << mean << ", first = " << first + << ", last = " << last << endl; + + int iterations = 10000; +// cerr << "Iterations: " << iterations << endl; + +// cerr << "CLOCKS_PER_SEC = " << CLOCKS_PER_SEC << endl; + float divisor = float(CLOCKS_PER_SEC) / 1000.f; + + clock_t start = clock(); + + for (int j = 0; j < iterations; ++j) { + for (int i = 0; i < N; ++i) { + target[i].re = mag[i] * cos(phase[i]); + target[i].im = mag[i] * sin(phase[i]); + } + } + + clock_t end = clock(); + + cerr << "Time for naive method: " << float(end - start)/divisor << endl; + + start = clock(); + + for (int j = 0; j < iterations; ++j) { + v_polar_to_cartesian(target, mag, phase, N); + } + + end = clock(); + + cerr << "Time for v_polar_to_cartesian: " << float(end - start)/divisor << endl; + + return true; +} + +bool +testPolarToCartInterleaved() +{ + cerr << "testVectorOps: testing v_polar_interleaved_to_cartesian" << endl; + + const int N = 1024; + bq_complex_t target[N]; + bq_complex_element_t source[N*2]; + + for (int i = 0; i < N; ++i) { + source[i*2] = drand48(); + source[i*2+1] = (drand48() * M_PI * 2) - M_PI; + } + + double mean, first, last, total = 0; + for (int i = 0; i < N; ++i) { + double real = source[i*2] * cos(source[i*2+1]); + double imag = source[i*2] * sin(source[i*2+1]); + if (i == 0) first = real; + if (i == N-1) last = imag; + total += real; + total += imag; + } + mean = total / (N*2); + cerr << "Naive method: mean = " << mean << ", first = " << first + << ", last = " << last << endl; + + v_polar_interleaved_to_cartesian(target, source, N); + + total = 0; + + for (int i = 0; i < N; ++i) { + if (i == 0) first = target[i].re; + if (i == N-1) last = target[i].im; + total += target[i].re; + total += target[i].im; + } + mean = total / (N*2); + cerr << "v_polar_interleaved_to_cartesian: mean = " << mean << ", first = " << first + << ", last = " << last << endl; + + int iterations = 10000; +// cerr << "Iterations: " << iterations << endl; + +// cerr << "CLOCKS_PER_SEC = " << CLOCKS_PER_SEC << endl; + float divisor = float(CLOCKS_PER_SEC) / 1000.f; + + clock_t start = clock(); + + for (int j = 0; j < iterations; ++j) { + for (int i = 0; i < N; ++i) { + target[i].re = source[i*2] * cos(source[i*2+1]); + target[i].im = source[i*2] * sin(source[i*2+1]); + } + } + + clock_t end = clock(); + + cerr << "Time for naive method: " << float(end - start)/divisor << endl; + + start = clock(); + + for (int j = 0; j < iterations; ++j) { + v_polar_interleaved_to_cartesian(target, source, N); + } + + end = clock(); + + cerr << "Time for v_polar_interleaved_to_cartesian: " << float(end - start)/divisor << endl; + + return true; +} + +bool +testCartToPolar() +{ + cerr << "testVectorOps: testing v_cartesian_to_polar" << endl; + + const int N = 1024; + bq_complex_t source[N]; + bq_complex_element_t mag[N]; + bq_complex_element_t phase[N]; + + for (int i = 0; i < N; ++i) { + source[i].re = (drand48() * 2.0) - 1.0; + source[i].im = (drand48() * 2.0) - 1.0; + } + + double mean, first, last, total = 0; + for (int i = 0; i < N; ++i) { + double mag = sqrt(source[i].re * source[i].re + source[i].im * source[i].im); + double phase = atan2(source[i].im, source[i].re); + if (i == 0) first = mag; + if (i == N-1) last = phase; + total += mag; + total += phase; + } + mean = total / (N*2); + cerr << "Naive method: mean = " << mean << ", first = " << first + << ", last = " << last << endl; + + v_cartesian_to_polar(mag, phase, source, N); + + total = 0; + + for (int i = 0; i < N; ++i) { + if (i == 0) first = mag[i]; + if (i == N-1) last = phase[i]; + total += mag[i]; + total += phase[i]; + } + mean = total / (N*2); + cerr << "v_cartesian_to_polar: mean = " << mean << ", first = " << first + << ", last = " << last << endl; + + int iterations = 10000; +// cerr << "Iterations: " << iterations << endl; + +// cerr << "CLOCKS_PER_SEC = " << CLOCKS_PER_SEC << endl; + float divisor = float(CLOCKS_PER_SEC) / 1000.f; + + clock_t start = clock(); + + for (int j = 0; j < iterations; ++j) { + for (int i = 0; i < N; ++i) { + mag[i] = sqrt(source[i].re * source[i].re + source[i].im * source[i].im); + phase[i] = atan2(source[i].im, source[i].re); + } + } + + clock_t end = clock(); + + cerr << "Time for naive method: " << float(end - start)/divisor << endl; + + start = clock(); + + for (int j = 0; j < iterations; ++j) { + v_cartesian_to_polar(mag, phase, source, N); + } + + end = clock(); + + cerr << "Time for v_cartesian_to_polar: " << float(end - start)/divisor << endl; + + return true; +} + +int main(int, char **) +{ + if (!testMultiply()) return 1; + if (!testPolarToCart()) return 1; + if (!testPolarToCartInterleaved()) return 1; + if (!testCartToPolar()) return 1; + return 0; +} +