changeset 372:af71cbdab621 tip

Update bqvec code
author Chris Cannam
date Tue, 19 Nov 2019 10:13:32 +0000
parents 426ce52ef096
children
files bqvec/COPYING bqvec/Makefile bqvec/README.md bqvec/README.txt bqvec/bqvec/Allocators.h bqvec/bqvec/Barrier.h bqvec/bqvec/ComplexTypes.h bqvec/bqvec/Range.h bqvec/bqvec/Restrict.h bqvec/bqvec/RingBuffer.h bqvec/bqvec/VectorOps.h bqvec/bqvec/VectorOpsComplex.h bqvec/build/Makefile.inc bqvec/build/Makefile.linux bqvec/build/Makefile.linux.ipp bqvec/build/Makefile.linux.min bqvec/build/Makefile.osx bqvec/build/run-platform-tests.sh bqvec/pommier/sse_mathfun.h bqvec/src/Allocators.cpp bqvec/src/Barrier.cpp bqvec/src/VectorOpsComplex.cpp bqvec/test/TestAllocators.cpp bqvec/test/TestVectorOps.cpp bqvec/test/TestVectorOps.h bqvec/test/TestVectorOpsComplex.cpp bqvec/test/Timings.cpp
diffstat 27 files changed, 2981 insertions(+), 632 deletions(-) [+]
line wrap: on
line diff
--- a/bqvec/COPYING	Sat Nov 12 09:59:34 2016 +0000
+++ b/bqvec/COPYING	Tue Nov 19 10:13:32 2019 +0000
@@ -1,7 +1,7 @@
 
 The following terms apply to the code in the src/ directory:
 
-    Copyright 2007-2014 Particular Programs Ltd.
+    Copyright 2007-2017 Particular Programs Ltd.
 
     Permission is hereby granted, free of charge, to any person
     obtaining a copy of this software and associated documentation
--- a/bqvec/Makefile	Sat Nov 12 09:59:34 2016 +0000
+++ b/bqvec/Makefile	Tue Nov 19 10:13:32 2019 +0000
@@ -7,9 +7,24 @@
 #  -DHAVE_IPP    Intel's Integrated Performance Primitives are available
 #  -DHAVE_VDSP   Apple's Accelerate framework is available
 #
-# These are optional (they affect performance, not function) and you
-# may define more than one of them.
-# 
+# The above are optional (they affect performance, not function) and
+# you may define more than one of them.
+#
+# The following two options trade off speed against precision for single-
+# precision paths in cases where IPP and VDSP are not available:
+#
+#  -DUSE_POMMIER_MATHFUN Use Julien Pommier's SSE/NEON implementation
+#   of sincos in 32-bit polar-to-cartesian conversion
+#  -DUSE_APPROXIMATE_ATAN2 Use a quick but *very* approximate atan2
+#   function in 32-bit cartesian-to-polar conversion
+#
+# And a handful of miscellaneous flags:
+#
+#  -DLACK_SINCOS  Math library lacks sincos() function
+#  -DNO_COMPLEX_TYPES  Don't bother defining bq_complex_t functions
+#  -DUSE_SINGLE_PRECISION_COMPLEX  Use float, not double, for bq_complex_t
+#  -DNO_EXCEPTIONS  Don't throw exceptions (abort instead)
+#
 # Add any relevant -I flags for include paths as well.
 #
 # Note that you must supply the same flags when including bqvec
@@ -17,72 +32,57 @@
 # may find it simplest to just add the bqvec source files to your
 # application's build system and not build a bqvec library at all.)
 
-VECTOR_DEFINES	:= 
+VECTOR_DEFINES		:=
 
 
 # Add to ALLOCATOR_DEFINES options relating to aligned malloc.
+# These are not usually necessary.
 #
 # Available options are
 #
-#  -DHAVE_POSIX_MEMALIGN     The posix_memalign call is available in sys/mman.h
-#  -DLACK_POSIX_MEMALIGN     The posix_memalign call is not available
+#  -DHAVE_POSIX_MEMALIGN       The posix_memalign call is available in sys/mman.h
+#  -DLACK_POSIX_MEMALIGN       The posix_memalign call is not available
 #
-#  -DMALLOC_IS_ALIGNED       The malloc call already returns aligned memory
-#  -DMALLOC_IS_NOT_ALIGNED   The malloc call does not return aligned memory
+#  -DMALLOC_IS_ALIGNED         The malloc call already returns aligned memory
+#  -DMALLOC_IS_NOT_ALIGNED     The malloc call does not return aligned memory
 #
-#  -DUSE_OWN_ALIGNED_MALLOC  No aligned malloc is available, roll your own
+#  -DUSE_OWN_ALIGNED_MALLOC    If no aligned malloc is available, roll your own
+#  -DAVOID_OWN_ALIGNED_MALLOC  If no aligned malloc is available, refuse to build
 #
-#  -DLACK_BAD_ALLOC          The C++ library lacks the std::bad_alloc exception
+#  -DLACK_BAD_ALLOC            The C++ library lacks the std::bad_alloc exception
 #
 # Here "aligned" is assumed to mean "aligned enough for whatever
-# vector stuff the space will be used for" which most likely means
+# vector stuff the space will be used for" which likely means at least
 # 16-byte alignment.
 #
-# The default is to use _aligned_malloc when building with Visual C++,
-# system malloc when building on OS/X, and posix_memalign otherwise.
+# If no options are provided, we will use IPP functions if HAVE_IPP is
+# defined, or else use _aligned_malloc when building with Visual C++
+# on Windows, roll our own when building with some other compiler on
+# Windows, use system malloc when building on OS/X, and use
+# posix_memalign elsewhere.
 #
 # Note that you must supply the same flags when including bqvec
 # headers later as you are using now when compiling the library. (You
 # may find it simplest to just add the bqvec source files to your
 # application's build system and not build a bqvec library at all.)
 
-ALLOCATOR_DEFINES := 
+ALLOCATOR_DEFINES 	:= 
 
 
-SRC_DIR	:= src
-HEADER_DIR := bqvec
+# Add any related includes and libraries here
+#
+THIRD_PARTY_INCLUDES	:=
+THIRD_PARTY_LIBS	:=
 
-SOURCES	:= $(wildcard $(SRC_DIR)/*.cpp)
-HEADERS	:= $(wildcard $(HEADER_DIR)/*.h) $(wildcard $(SRC_DIR)/*.h)
 
-OBJECTS	:= $(SOURCES:.cpp=.o)
-OBJECTS	:= $(OBJECTS:.c=.o)
+# If you are including a set of bq libraries into a project, you can
+# override variables for all of them (including all of the above) in
+# the following file, which all bq* Makefiles will include if found
 
-CXXFLAGS := $(VECTOR_DEFINES) $(ALLOCATOR_DEFINES) -I.
+-include ../Makefile.inc-bq
 
-LIBRARY	:= libbqvec.a
 
-all:	$(LIBRARY)
+# This project-local Makefile describes the source files and contains
+# no routinely user-modifiable parts
 
-$(LIBRARY):	$(OBJECTS)
-	$(AR) rc $@ $^
-
-clean:		
-	rm -f $(OBJECTS)
-
-distclean:	clean
-	rm -f $(LIBRARY)
-
-depend:
-	makedepend -Y -fMakefile $(SOURCES) $(HEADERS)
-
-
-# DO NOT DELETE
-
-src/VectorOpsComplex.o: bqvec/VectorOpsComplex.h bqvec/VectorOps.h
-src/VectorOpsComplex.o: bqvec/Restrict.h bqvec/ComplexTypes.h
-src/Allocators.o: bqvec/Allocators.h bqvec/VectorOps.h bqvec/Restrict.h
-bqvec/VectorOpsComplex.o: bqvec/VectorOps.h bqvec/Restrict.h
-bqvec/VectorOpsComplex.o: bqvec/ComplexTypes.h
-bqvec/VectorOps.o: bqvec/Restrict.h
-bqvec/Allocators.o: bqvec/VectorOps.h bqvec/Restrict.h
+include build/Makefile.inc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bqvec/README.md	Tue Nov 19 10:13:32 2019 +0000
@@ -0,0 +1,36 @@
+
+bqvec
+=====
+
+A small library for vector management and arithmetic in C++ using raw
+C pointer arrays, designed for simple audio buffer-shuffling. Also
+includes aligned malloc wrappers and a lock-free ring buffer.
+
+The code can call out to vector arithmetic helpers (IPP, vDSP) in some
+places, and has loops written with an eye to auto-vectorising
+compilers, but mostly this is a convenience library rather than for
+performance -- it initially exists to give a fairly consistent API to
+useful functions over audio buffer arrays.
+
+This code originated as part of the Rubber Band Library written by the
+same authors (see https://hg.sr.ht/~breakfastquay/rubberband/).
+It has been pulled out into a separate library and relicensed under a
+more permissive licence.
+
+Generally expected to be vendored in to local project builds rather
+than being installed as a system library.
+
+C++ standard required: C++98 (does not use C++11 or newer features)
+
+ * To compile on Linux: make
+   
+ * To compile on macOS: make -f build/Makefile.osx
+
+ * To build and run tests: as above, but add the "test" target -
+   requires Boost test headers installed
+
+[![Build Status](https://travis-ci.org/breakfastquay/bqvec.svg?branch=master)](https://travis-ci.org/breakfastquay/bqvec)
+
+Copyright 2007-2018 Particular Programs Ltd. See the file COPYING for
+(BSD/MIT-style) licence terms.
+
--- a/bqvec/README.txt	Sat Nov 12 09:59:34 2016 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,15 +0,0 @@
-
-bqvec
-=====
-
-A small library for efficient vector arithmetic and allocation in C++
-using raw C pointer arrays.
-
-Copyright 2007-2014 Particular Programs Ltd.
-
-This code originated as part of the Rubber Band Library written by the
-same authors (see https://bitbucket.org/breakfastquay/rubberband/).
-It has been pulled out into a separate library and relicensed under a
-more permissive licence: a BSD/MIT-style licence, as opposed to the
-GPL used for Rubber Band.  See the file COPYING for details.
-
--- a/bqvec/bqvec/Allocators.h	Sat Nov 12 09:59:34 2016 +0000
+++ b/bqvec/bqvec/Allocators.h	Tue Nov 19 10:13:32 2019 +0000
@@ -6,7 +6,7 @@
     A small library for vector arithmetic and allocation in C++ using
     raw C pointer arrays.
 
-    Copyright 2007-2015 Particular Programs Ltd.
+    Copyright 2007-2017 Particular Programs Ltd.
 
     Permission is hereby granted, free of charge, to any person
     obtaining a copy of this software and associated documentation
@@ -36,35 +36,65 @@
 #ifndef BQVEC_ALLOCATORS_H
 #define BQVEC_ALLOCATORS_H
 
-#include "VectorOps.h"
+/*
+ * Aligned and per-channel allocators and deallocators for raw C array
+ * buffers and C++ STL vectors.
+ */
 
-#include <new> // for std::bad_alloc
+#include <new>
 #include <stdlib.h>
+#include <stddef.h>
+#include <stdexcept>
 
 #ifndef HAVE_POSIX_MEMALIGN
+#ifndef LACK_POSIX_MEMALIGN
 #ifndef _WIN32
 #ifndef __APPLE__
-#ifndef LACK_POSIX_MEMALIGN
 #define HAVE_POSIX_MEMALIGN
 #endif
 #endif
 #endif
 #endif
 
+#ifndef MALLOC_IS_ALIGNED
 #ifndef MALLOC_IS_NOT_ALIGNED
-#ifdef __APPLE_
+#ifdef __APPLE__
+#define MALLOC_IS_ALIGNED
+#endif
+#endif
+#endif
+
+#ifndef HAVE__ALIGNED_MALLOC
+#ifndef LACK__ALIGNED_MALLOC
+#ifdef _WIN32
+#define HAVE__ALIGNED_MALLOC
+#endif
+#endif
+#endif
+
+#ifndef USE_OWN_ALIGNED_MALLOC
+#ifndef AVOID_OWN_ALIGNED_MALLOC
+#ifndef HAVE_POSIX_MEMALIGN
 #ifndef MALLOC_IS_ALIGNED
-#define MALLOC_IS_ALIGNED
+#ifndef HAVE__ALIGNED_MALLOC
+#define USE_OWN_ALIGNED_MALLOC
+#endif
+#endif
 #endif
 #endif
 #endif
 
 #ifdef HAVE_POSIX_MEMALIGN
 #include <sys/mman.h>
+#include <errno.h>
 #endif
 
+#ifndef NO_EXCEPTIONS
 #ifdef LACK_BAD_ALLOC
-namespace std { struct bad_alloc { }; }
+namespace std {
+    struct bad_alloc { };
+}
+#endif
 #endif
 
 namespace breakfastquay {
@@ -73,9 +103,45 @@
 T *allocate(size_t count)
 {
     void *ptr = 0;
-    // 32-byte alignment is required for at least OpenMAX
+
+    // We'd like to check HAVE_IPP first and, if it's defined, call
+    // ippsMalloc_8u(count * sizeof(T)). But that isn't a general
+    // replacement for malloc() because it only takes an int
+    // argument. So we save it for the specialisations of
+    // allocate<float> and allocate<double> below, where we're more
+    // likely to get away with it.
+
+#ifdef MALLOC_IS_ALIGNED
+    ptr = malloc(count * sizeof(T));
+#else /* !MALLOC_IS_ALIGNED */
+
+    // That's the "sufficiently aligned" functions dealt with, the
+    // rest need a specific alignment provided to the call. 32-byte
+    // alignment is required for at least OpenMAX
     static const int alignment = 32;
+
+#ifdef HAVE__ALIGNED_MALLOC
+    ptr = _aligned_malloc(count * sizeof(T), alignment);
+#else /* !HAVE__ALIGNED_MALLOC */
+
+#ifdef HAVE_POSIX_MEMALIGN
+    int rv = posix_memalign(&ptr, alignment, count * sizeof(T));
+    if (rv) {
+#ifndef NO_EXCEPTIONS
+        if (rv == EINVAL) {
+            throw std::logic_error("Internal error: invalid alignment");
+         } else {
+            throw std::bad_alloc();
+        }
+#else
+        abort();
+#endif
+    }
+#else /* !HAVE_POSIX_MEMALIGN */
+    
 #ifdef USE_OWN_ALIGNED_MALLOC
+#pragma message("Rolling own aligned malloc: this is unlikely to perform as well as the alternatives")
+
     // Alignment must be a power of two, bigger than the pointer
     // size. Stuff the actual malloc'd pointer in just before the
     // returned value.  This is the least desirable way to do this --
@@ -86,34 +152,32 @@
         char *adj = (char *)buf;
         while ((unsigned long long)adj & (alignment-1)) --adj;
         ptr = ((char *)adj) + alignment;
+        new (((void **)ptr)[-1]) (void *);
         ((void **)ptr)[-1] = buf;
     }
+
 #else /* !USE_OWN_ALIGNED_MALLOC */
-#ifdef HAVE_POSIX_MEMALIGN
-    if (posix_memalign(&ptr, alignment, count * sizeof(T))) {
-        ptr = malloc(count * sizeof(T));
-    }
-#else /* !HAVE_POSIX_MEMALIGN */
-#ifdef __MSVC__
-    ptr = _aligned_malloc(count * sizeof(T), alignment);
-#else /* !__MSVC__ */
-#ifndef MALLOC_IS_ALIGNED
-#error "No aligned malloc available: define MALLOC_IS_ALIGNED to stick with system malloc, HAVE_POSIX_MEMALIGN if posix_memalign is available, or USE_OWN_ALIGNED_MALLOC to roll our own"
-#endif
-    // Note that malloc always aligns to 16 byte boundaries on OS/X
-    ptr = malloc(count * sizeof(T));
-    (void)alignment; // avoid compiler warning for unused 
-#endif /* !__MSVC__ */
+
+#error "No aligned malloc available: define MALLOC_IS_ALIGNED to use system malloc, HAVE_POSIX_MEMALIGN if posix_memalign is available, HAVE__ALIGNED_MALLOC if _aligned_malloc is available, or USE_OWN_ALIGNED_MALLOC to roll our own"
+
+#endif /* !USE_OWN_ALIGNED_MALLOC */
 #endif /* !HAVE_POSIX_MEMALIGN */
-#endif /* !USE_OWN_ALIGNED_MALLOC */
+#endif /* !HAVE__ALIGNED_MALLOC */
+#endif /* !MALLOC_IS_ALIGNED */
+
     if (!ptr) {
 #ifndef NO_EXCEPTIONS
-        throw(std::bad_alloc());
+        throw std::bad_alloc();
 #else
         abort();
 #endif
     }
-    return (T *)ptr;
+
+    T *typed_ptr = static_cast<T *>(ptr);
+    for (size_t i = 0; i < count; ++i) {
+        new (typed_ptr + i) T;
+    }
+    return typed_ptr;
 }
 
 #ifdef HAVE_IPP
@@ -130,22 +194,39 @@
 T *allocate_and_zero(size_t count)
 {
     T *ptr = allocate<T>(count);
-    v_zero(ptr, count);
+    for (size_t i = 0; i < count; ++i) {
+        ptr[i] = T();
+    }
     return ptr;
 }
 
 template <typename T>
 void deallocate(T *ptr)
 {
+    if (!ptr) return;
+    
+#ifdef MALLOC_IS_ALIGNED
+    free((void *)ptr);
+#else /* !MALLOC_IS_ALIGNED */
+
+#ifdef HAVE__ALIGNED_MALLOC
+    _aligned_free((void *)ptr);
+#else /* !HAVE__ALIGNED_MALLOC */
+
+#ifdef HAVE_POSIX_MEMALIGN
+    free((void *)ptr);
+#else /* !HAVE_POSIX_MEMALIGN */
+    
 #ifdef USE_OWN_ALIGNED_MALLOC
-    if (ptr) free(((void **)ptr)[-1]);
+    free(((void **)ptr)[-1]);
 #else /* !USE_OWN_ALIGNED_MALLOC */
-#ifdef __MSVC__
-    if (ptr) _aligned_free((void *)ptr);
-#else /* !__MSVC__ */
-    if (ptr) free((void *)ptr);
-#endif /* !__MSVC__ */
+
+#error "No aligned malloc available: define MALLOC_IS_ALIGNED to use system malloc, HAVE_POSIX_MEMALIGN if posix_memalign is available, or USE_OWN_ALIGNED_MALLOC to roll our own"
+
 #endif /* !USE_OWN_ALIGNED_MALLOC */
+#endif /* !HAVE_POSIX_MEMALIGN */
+#endif /* !HAVE__ALIGNED_MALLOC */
+#endif /* !MALLOC_IS_ALIGNED */
 }
 
 #ifdef HAVE_IPP
@@ -164,7 +245,11 @@
 {
     T *newptr = allocate<T>(count);
     if (oldcount && ptr) {
-        v_copy(newptr, ptr, oldcount < count ? oldcount : count);
+        size_t tocopy = oldcount;
+        if (count < oldcount) tocopy = count;
+        for (size_t i = 0; i < tocopy; ++i) {
+            newptr[i] = ptr[i];
+        }
     }
     if (ptr) deallocate<T>(ptr);
     return newptr;
@@ -175,7 +260,9 @@
 T *reallocate_and_zero(T *ptr, size_t oldcount, size_t count)
 {
     ptr = reallocate(ptr, oldcount, count);
-    v_zero(ptr, count);
+    for (size_t i = 0; i < count; ++i) {
+        ptr[i] = T();
+    }
     return ptr;
 }
 	
@@ -184,7 +271,11 @@
 T *reallocate_and_zero_extension(T *ptr, size_t oldcount, size_t count)
 {
     ptr = reallocate(ptr, oldcount, count);
-    if (count > oldcount) v_zero(ptr + oldcount, count - oldcount);
+    if (count > oldcount) {
+        for (size_t i = oldcount; i < count; ++i) {
+            ptr[i] = T();
+        }
+    }
     return ptr;
 }
 
@@ -229,7 +320,11 @@
 {
     T **newptr = allocate_channels<T>(channels, count);
     if (oldcount && ptr) {
-        v_copy_channels(newptr, ptr, channels, oldcount < count ? oldcount : count);
+        for (size_t c = 0; c < channels; ++c) {
+            for (size_t i = 0; i < oldcount && i < count; ++i) {
+                newptr[c][i] = ptr[c][i];
+            }
+        }
     } 
     if (ptr) deallocate_channels<T>(ptr, oldchannels);
     return newptr;
@@ -242,23 +337,121 @@
 {
     T **newptr = allocate_and_zero_channels<T>(channels, count);
     if (oldcount && ptr) {
-        v_copy_channels(newptr, ptr, channels, oldcount < count ? oldcount : count);
+        for (size_t c = 0; c < channels; ++c) {
+            for (size_t i = 0; i < oldcount && i < count; ++i) {
+                newptr[c][i] = ptr[c][i];
+            }
+        }
     } 
     if (ptr) deallocate_channels<T>(ptr, oldchannels);
     return newptr;
 }
 
-/// RAII class to call deallocate() on destruction
+
+/** Trivial RAII class to call deallocate() on destruction.
+ */
 template <typename T>
 class Deallocator
 {
 public:
     Deallocator(T *t) : m_t(t) { }
     ~Deallocator() { deallocate<T>(m_t); }
+
 private:
     T *m_t;
 };
 
+
+/** Allocator for use with STL classes, e.g. vector, to ensure
+ *  alignment.  Based on example code by Stephan T. Lavavej.
+ *
+ *  e.g. std::vector<float, StlAllocator<float> > v;
+ */
+template <typename T>
+class StlAllocator
+{
+public:
+    typedef T *pointer;
+    typedef const T *const_pointer;
+    typedef T &reference;
+    typedef const T &const_reference;
+    typedef T value_type;
+    typedef size_t size_type;
+    typedef ptrdiff_t difference_type;
+
+    StlAllocator() { }
+    StlAllocator(const StlAllocator&) { }
+    template <typename U> StlAllocator(const StlAllocator<U>&) { }
+    ~StlAllocator() { }
+
+    T *
+    allocate(const size_t n) const {
+        if (n == 0) return 0;
+        if (n > max_size()) {
+#ifndef NO_EXCEPTIONS
+            throw std::length_error("Size overflow in StlAllocator::allocate()");
+#else
+            abort();
+#endif
+        }
+        return ::breakfastquay::allocate<T>(n);
+    }
+
+    void
+    deallocate(T *const p, const size_t) const {
+        ::breakfastquay::deallocate(p);
+    }
+
+    template <typename U>
+    T *
+    allocate(const size_t n, const U *) const {
+        return allocate(n);
+    }
+
+    T *
+    address(T &r) const {
+        return &r;
+    }
+
+    const T *
+    address(const T &s) const {
+        return &s;
+    }
+
+    size_t
+    max_size() const {
+        return (static_cast<size_t>(0) - static_cast<size_t>(1)) / sizeof(T);
+    }
+
+    template <typename U> struct rebind {
+        typedef StlAllocator<U> other;
+    };
+    
+    bool
+    operator==(const StlAllocator &) const {
+        return true;
+    }
+
+    bool
+    operator!=(const StlAllocator &) const {
+        return false;
+    }
+
+    void
+    construct(T *const p, const T &t) const {
+        void *const pv = static_cast<void *>(p);
+        new (pv) T(t);
+    }
+
+    void
+    destroy(T *const p) const {
+        p->~T();
+    }
+
+private:
+    StlAllocator& operator=(const StlAllocator&);
+};
+
 }
 
 #endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bqvec/bqvec/Barrier.h	Tue Nov 19 10:13:32 2019 +0000
@@ -0,0 +1,45 @@
+/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
+
+/*
+    bqvec
+
+    A small library for vector arithmetic and allocation in C++ using
+    raw C pointer arrays.
+
+    Copyright 2007-2017 Particular Programs Ltd.
+
+    Permission is hereby granted, free of charge, to any person
+    obtaining a copy of this software and associated documentation
+    files (the "Software"), to deal in the Software without
+    restriction, including without limitation the rights to use, copy,
+    modify, merge, publish, distribute, sublicense, and/or sell copies
+    of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be
+    included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
+    ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+    CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+    WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+    Except as contained in this notice, the names of Chris Cannam and
+    Particular Programs Ltd shall not be used in advertising or
+    otherwise to promote the sale, use or other dealings in this
+    Software without prior written authorization.
+*/
+
+#ifndef BQVEC_BARRIER_H
+#define BQVEC_BARRIER_H
+
+namespace breakfastquay {
+    extern void system_memorybarrier();
+}
+
+#define BQ_MBARRIER() ::breakfastquay::system_memorybarrier()
+
+#endif
--- a/bqvec/bqvec/ComplexTypes.h	Sat Nov 12 09:59:34 2016 +0000
+++ b/bqvec/bqvec/ComplexTypes.h	Tue Nov 19 10:13:32 2019 +0000
@@ -6,7 +6,7 @@
     A small library for vector arithmetic and allocation in C++ using
     raw C pointer arrays.
 
-    Copyright 2007-2015 Particular Programs Ltd.
+    Copyright 2007-2017 Particular Programs Ltd.
 
     Permission is hereby granted, free of charge, to any person
     obtaining a copy of this software and associated documentation
@@ -39,12 +39,19 @@
 namespace breakfastquay {
 
 #ifndef NO_COMPLEX_TYPES
+
+#ifdef USE_SINGLE_PRECISION_COMPLEX
+typedef float bq_complex_element_t;
+#else
+typedef double bq_complex_element_t;
+#endif
+
 // Convertible with other complex types that store re+im consecutively
-typedef double bq_complex_element_t;
 struct bq_complex_t {
     bq_complex_element_t re;
     bq_complex_element_t im;
 };
+
 #endif
 
 }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bqvec/bqvec/Range.h	Tue Nov 19 10:13:32 2019 +0000
@@ -0,0 +1,57 @@
+/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
+
+/*
+    bqvec
+
+    A small library for vector arithmetic and allocation in C++ using
+    raw C pointer arrays.
+
+    Copyright 2007-2017 Particular Programs Ltd.
+
+    Permission is hereby granted, free of charge, to any person
+    obtaining a copy of this software and associated documentation
+    files (the "Software"), to deal in the Software without
+    restriction, including without limitation the rights to use, copy,
+    modify, merge, publish, distribute, sublicense, and/or sell copies
+    of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be
+    included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
+    ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+    CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+    WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+    Except as contained in this notice, the names of Chris Cannam and
+    Particular Programs Ltd shall not be used in advertising or
+    otherwise to promote the sale, use or other dealings in this
+    Software without prior written authorization.
+*/
+
+#ifndef BQVEC_RANGE_H
+#define BQVEC_RANGE_H
+
+namespace breakfastquay {
+
+/** Check whether an integer index is in range for a container,
+    avoiding overflows and signed/unsigned comparison warnings.
+*/
+template<typename T, typename C>
+bool in_range_for(const C &container, T i)
+{
+    if (i < 0) return false;
+    if (sizeof(T) > sizeof(typename C::size_type)) {
+	return i < static_cast<T>(container.size());
+    } else {
+	return static_cast<typename C::size_type>(i) < container.size();
+    }
+}
+
+}
+
+#endif
--- a/bqvec/bqvec/Restrict.h	Sat Nov 12 09:59:34 2016 +0000
+++ b/bqvec/bqvec/Restrict.h	Tue Nov 19 10:13:32 2019 +0000
@@ -6,7 +6,7 @@
     A small library for vector arithmetic and allocation in C++ using
     raw C pointer arrays.
 
-    Copyright 2007-2015 Particular Programs Ltd.
+    Copyright 2007-2017 Particular Programs Ltd.
 
     Permission is hereby granted, free of charge, to any person
     obtaining a copy of this software and associated documentation
@@ -36,13 +36,21 @@
 #ifndef BQVEC_RESTRICT_H
 #define BQVEC_RESTRICT_H
 
+#ifdef _MSC_VER
+#define BQ_R__ __restrict
+#else
 #ifdef __MSVC__
 #define BQ_R__ __restrict
 #endif
+#endif
 
+#ifdef __clang__
+#define BQ_R__ __restrict__
+#else
 #ifdef __GNUC__
 #define BQ_R__ __restrict__
 #endif
+#endif
 
 #ifndef BQ_R__
 #define BQ_R__
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bqvec/bqvec/RingBuffer.h	Tue Nov 19 10:13:32 2019 +0000
@@ -0,0 +1,518 @@
+/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
+
+/*
+    bqvec
+
+    A small library for vector arithmetic and allocation in C++ using
+    raw C pointer arrays.
+
+    Copyright 2007-2017 Particular Programs Ltd.
+
+    Permission is hereby granted, free of charge, to any person
+    obtaining a copy of this software and associated documentation
+    files (the "Software"), to deal in the Software without
+    restriction, including without limitation the rights to use, copy,
+    modify, merge, publish, distribute, sublicense, and/or sell copies
+    of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be
+    included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
+    ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+    CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+    WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+    Except as contained in this notice, the names of Chris Cannam and
+    Particular Programs Ltd shall not be used in advertising or
+    otherwise to promote the sale, use or other dealings in this
+    Software without prior written authorization.
+*/
+
+#ifndef BQVEC_RINGBUFFER_H
+#define BQVEC_RINGBUFFER_H
+
+//#define DEBUG_RINGBUFFER 1
+
+#include "Barrier.h"
+#include "Allocators.h"
+#include "Restrict.h"
+#include "VectorOps.h"
+
+#include <sys/types.h>
+
+#include <iostream>
+
+namespace breakfastquay {
+
+/**
+ * RingBuffer implements a lock-free ring buffer for one writer and
+ * one reader, that is to be used to store a sample type T.
+ *
+ * RingBuffer is thread-safe provided only one thread writes and only
+ * one thread reads.
+ */
+template <typename T>
+class RingBuffer
+{
+public:
+    /**
+     * Create a ring buffer with room to write n samples.
+     *
+     * Note that the internal storage size will actually be n+1
+     * samples, as one element is unavailable for administrative
+     * reasons.  Since the ring buffer performs best if its size is a
+     * power of two, this means n should ideally be some power of two
+     * minus one.
+     */
+    RingBuffer(int n);
+
+    virtual ~RingBuffer();
+
+    /**
+     * Return the total capacity of the ring buffer in samples.
+     * (This is the argument n passed to the constructor.)
+     */
+    int getSize() const;
+
+    /**
+     * Return a new ring buffer (allocated with "new" -- caller must
+     * delete when no longer needed) of the given size, containing the
+     * same data as this one.  If another thread reads from or writes
+     * to this buffer during the call, the results may be incomplete
+     * or inconsistent.  If this buffer's data will not fit in the new
+     * size, the contents are undefined.
+     */
+    RingBuffer<T> *resized(int newSize) const;
+
+    /**
+     * Reset read and write pointers, thus emptying the buffer.
+     * Should be called from the write thread.
+     */
+    void reset();
+
+    /**
+     * Return the amount of data available for reading, in samples.
+     */
+    int getReadSpace() const;
+
+    /**
+     * Return the amount of space available for writing, in samples.
+     */
+    int getWriteSpace() const;
+
+    /**
+     * Read n samples from the buffer.  If fewer than n are available,
+     * the remainder will be zeroed out.  Returns the number of
+     * samples actually read.
+     *
+     * This is a template function, taking an argument S for the target
+     * sample type, which is permitted to differ from T if the two
+     * types are compatible for arithmetic operations.
+     */
+    template <typename S>
+    int read(S *const BQ_R__ destination, int n);
+
+    /**
+     * Read n samples from the buffer, adding them to the destination.
+     * If fewer than n are available, the remainder will be left
+     * alone.  Returns the number of samples actually read.
+     *
+     * This is a template function, taking an argument S for the target
+     * sample type, which is permitted to differ from T if the two
+     * types are compatible for arithmetic operations.
+     */
+    template <typename S>
+    int readAdding(S *const BQ_R__ destination, int n);
+
+    /**
+     * Read one sample from the buffer.  If no sample is available,
+     * this will silently return zero.  Calling this repeatedly is
+     * obviously slower than calling read once, but it may be good
+     * enough if you don't want to allocate a buffer to read into.
+     */
+    T readOne();
+
+    /**
+     * Read n samples from the buffer, if available, without advancing
+     * the read pointer -- i.e. a subsequent read() or skip() will be
+     * necessary to empty the buffer.  If fewer than n are available,
+     * the remainder will be zeroed out.  Returns the number of
+     * samples actually read.
+     */
+    int peek(T *const BQ_R__ destination, int n) const;
+
+    /**
+     * Read one sample from the buffer, if available, without
+     * advancing the read pointer -- i.e. a subsequent read() or
+     * skip() will be necessary to empty the buffer.  Returns zero if
+     * no sample was available.
+     */
+    T peekOne() const;
+
+    /**
+     * Pretend to read n samples from the buffer, without actually
+     * returning them (i.e. discard the next n samples).  Returns the
+     * number of samples actually available for discarding.
+     */
+    int skip(int n);
+
+    /**
+     * Write n samples to the buffer.  If insufficient space is
+     * available, not all samples may actually be written.  Returns
+     * the number of samples actually written.
+     *
+     * This is a template function, taking an argument S for the source
+     * sample type, which is permitted to differ from T if the two
+     * types are compatible for assignment.
+     */
+    template <typename S>
+    int write(const S *const BQ_R__ source, int n);
+
+    /**
+     * Write n zero-value samples to the buffer.  If insufficient
+     * space is available, not all zeros may actually be written.
+     * Returns the number of zeroes actually written.
+     */
+    int zero(int n);
+
+protected:
+    T *const BQ_R__ m_buffer;
+    int             m_writer;
+    int             m_reader;
+    const int       m_size;
+
+    int readSpaceFor(int w, int r) const {
+        int space;
+        if (w > r) space = w - r;
+        else if (w < r) space = (w + m_size) - r;
+        else space = 0;
+        return space;
+    }
+
+    int writeSpaceFor(int w, int r) const {
+        int space = (r + m_size - w - 1);
+        if (space >= m_size) space -= m_size;
+        return space;
+    }
+
+private:
+    RingBuffer(const RingBuffer &); // not provided
+    RingBuffer &operator=(const RingBuffer &); // not provided
+};
+
+template <typename T>
+RingBuffer<T>::RingBuffer(int n) :
+    m_buffer(allocate<T>(n + 1)),
+    m_writer(0),
+    m_size(n + 1)
+{
+#ifdef DEBUG_RINGBUFFER
+    std::cerr << "RingBuffer<T>[" << this << "]::RingBuffer(" << n << ")" << std::endl;
+#endif
+
+    m_reader = 0;
+}
+
+template <typename T>
+RingBuffer<T>::~RingBuffer()
+{
+#ifdef DEBUG_RINGBUFFER
+    std::cerr << "RingBuffer<T>[" << this << "]::~RingBuffer" << std::endl;
+#endif
+
+    deallocate(m_buffer);
+}
+
+template <typename T>
+int
+RingBuffer<T>::getSize() const
+{
+#ifdef DEBUG_RINGBUFFER
+    std::cerr << "RingBuffer<T>[" << this << "]::getSize(): " << m_size-1 << std::endl;
+#endif
+
+    return m_size - 1;
+}
+
+template <typename T>
+RingBuffer<T> *
+RingBuffer<T>::resized(int newSize) const
+{
+    RingBuffer<T> *newBuffer = new RingBuffer<T>(newSize);
+
+    int w = m_writer;
+    int r = m_reader;
+
+    while (r != w) {
+        T value = m_buffer[r];
+        newBuffer->write(&value, 1);
+        if (++r == m_size) r = 0;
+    }
+
+    return newBuffer;
+}
+
+template <typename T>
+void
+RingBuffer<T>::reset()
+{
+#ifdef DEBUG_RINGBUFFER
+    std::cerr << "RingBuffer<T>[" << this << "]::reset" << std::endl;
+#endif
+
+    m_reader = m_writer;
+}
+
+template <typename T>
+int
+RingBuffer<T>::getReadSpace() const
+{
+    return readSpaceFor(m_writer, m_reader);
+}
+
+template <typename T>
+int
+RingBuffer<T>::getWriteSpace() const
+{
+    return writeSpaceFor(m_writer, m_reader);
+}
+
+template <typename T>
+template <typename S>
+int
+RingBuffer<T>::read(S *const BQ_R__ destination, int n)
+{
+    int w = m_writer;
+    int r = m_reader;
+
+    int available = readSpaceFor(w, r);
+    if (n > available) {
+	std::cerr << "WARNING: RingBuffer::read: " << n << " requested, only "
+                  << available << " available" << std::endl;
+	n = available;
+    }
+    if (n == 0) return n;
+
+    int here = m_size - r;
+    T *const BQ_R__ bufbase = m_buffer + r;
+
+    if (here >= n) {
+        v_convert(destination, bufbase, n);
+    } else {
+        v_convert(destination, bufbase, here);
+        v_convert(destination + here, m_buffer, n - here);
+    }
+
+    r += n;
+    while (r >= m_size) r -= m_size;
+
+    BQ_MBARRIER();
+    m_reader = r;
+
+    return n;
+}
+
+template <typename T>
+template <typename S>
+int
+RingBuffer<T>::readAdding(S *const BQ_R__ destination, int n)
+{
+    int w = m_writer;
+    int r = m_reader;
+
+    int available = readSpaceFor(w, r);
+    if (n > available) {
+	std::cerr << "WARNING: RingBuffer::read: " << n << " requested, only "
+                  << available << " available" << std::endl;
+	n = available;
+    }
+    if (n == 0) return n;
+
+    int here = m_size - r;
+    T *const BQ_R__ bufbase = m_buffer + r;
+
+    if (here >= n) {
+        v_add(destination, bufbase, n);
+    } else {
+        v_add(destination, bufbase, here);
+        v_add(destination + here, m_buffer, n - here);
+    }
+
+    r += n;
+    while (r >= m_size) r -= m_size;
+
+    BQ_MBARRIER();
+    m_reader = r;
+
+    return n;
+}
+
+template <typename T>
+T
+RingBuffer<T>::readOne()
+{
+    int w = m_writer;
+    int r = m_reader;
+
+    if (w == r) {
+	std::cerr << "WARNING: RingBuffer::readOne: no sample available"
+		  << std::endl;
+	return T();
+    }
+
+    T value = m_buffer[r];
+    if (++r == m_size) r = 0;
+
+    BQ_MBARRIER();
+    m_reader = r;
+
+    return value;
+}
+
+template <typename T>
+int
+RingBuffer<T>::peek(T *const BQ_R__ destination, int n) const
+{
+    int w = m_writer;
+    int r = m_reader;
+
+    int available = readSpaceFor(w, r);
+    if (n > available) {
+	std::cerr << "WARNING: RingBuffer::peek: " << n << " requested, only "
+                  << available << " available" << std::endl;
+	memset(destination + available, 0, (n - available) * sizeof(T));
+	n = available;
+    }
+    if (n == 0) return n;
+
+    int here = m_size - r;
+    const T *const BQ_R__ bufbase = m_buffer + r;
+
+    if (here >= n) {
+        v_copy(destination, bufbase, n);
+    } else {
+        v_copy(destination, bufbase, here);
+        v_copy(destination + here, m_buffer, n - here);
+    }
+
+    return n;
+}
+
+template <typename T>
+T
+RingBuffer<T>::peekOne() const
+{
+    int w = m_writer;
+    int r = m_reader;
+
+    if (w == r) {
+	std::cerr << "WARNING: RingBuffer::peekOne: no sample available"
+		  << std::endl;
+	return 0;
+    }
+
+    T value = m_buffer[r];
+    return value;
+}
+
+template <typename T>
+int
+RingBuffer<T>::skip(int n)
+{
+    int w = m_writer;
+    int r = m_reader;
+
+    int available = readSpaceFor(w, r);
+    if (n > available) {
+	std::cerr << "WARNING: RingBuffer::skip: " << n << " requested, only "
+                  << available << " available" << std::endl;
+	n = available;
+    }
+    if (n == 0) return n;
+
+    r += n;
+    while (r >= m_size) r -= m_size;
+
+    // No memory barrier required, because we didn't read any data
+    m_reader = r;
+
+    return n;
+}
+
+template <typename T>
+template <typename S>
+int
+RingBuffer<T>::write(const S *const BQ_R__ source, int n)
+{
+    int w = m_writer;
+    int r = m_reader;
+
+    int available = writeSpaceFor(w, r);
+    if (n > available) {
+	std::cerr << "WARNING: RingBuffer::write: " << n
+                  << " requested, only room for " << available << std::endl;
+	n = available;
+    }
+    if (n == 0) return n;
+
+    int here = m_size - w;
+    T *const BQ_R__ bufbase = m_buffer + w;
+
+    if (here >= n) {
+        v_convert<S, T>(bufbase, source, n);
+    } else {
+        v_convert<S, T>(bufbase, source, here);
+        v_convert<S, T>(m_buffer, source + here, n - here);
+    }
+
+    w += n;
+    while (w >= m_size) w -= m_size;
+
+    BQ_MBARRIER();
+    m_writer = w;
+
+    return n;
+}
+
+template <typename T>
+int
+RingBuffer<T>::zero(int n)
+{
+    int w = m_writer;
+    int r = m_reader;
+
+    int available = writeSpaceFor(w, r);
+    if (n > available) {
+	std::cerr << "WARNING: RingBuffer::zero: " << n
+                  << " requested, only room for " << available << std::endl;
+	n = available;
+    }
+    if (n == 0) return n;
+
+    int here = m_size - w;
+    T *const BQ_R__ bufbase = m_buffer + w;
+
+    if (here >= n) {
+        v_zero(bufbase, n);
+    } else {
+        v_zero(bufbase, here);
+        v_zero(m_buffer, n - here);
+    }
+
+    w += n;
+    while (w >= m_size) w -= m_size;
+
+    BQ_MBARRIER();
+    m_writer = w;
+
+    return n;
+}
+
+}
+
+#endif // BQVEC_RINGBUFFER_H
--- a/bqvec/bqvec/VectorOps.h	Sat Nov 12 09:59:34 2016 +0000
+++ b/bqvec/bqvec/VectorOps.h	Tue Nov 19 10:13:32 2019 +0000
@@ -6,7 +6,7 @@
     A small library for vector arithmetic and allocation in C++ using
     raw C pointer arrays.
 
-    Copyright 2007-2015 Particular Programs Ltd.
+    Copyright 2007-2016 Particular Programs Ltd.
 
     Permission is hereby granted, free of charge, to any person
     obtaining a copy of this software and associated documentation
@@ -40,15 +40,20 @@
 #ifndef _MSC_VER
 #include <inttypes.h>
 #endif
+#include <ippversion.h>
 #include <ipps.h>
+#if (IPP_VERSION_MAJOR <= 7)
+// Deprecated in v8, removed in v9
 #include <ippac.h>
 #endif
+#endif
 
 #ifdef HAVE_VDSP
 #include <Accelerate/Accelerate.h>
 #endif
 
 #include <cstring>
+#include <cmath>
 
 #include "Restrict.h"
 
@@ -59,9 +64,17 @@
  * 
  * Write basic vector-manipulation loops in such a way as to promote
  * the likelihood that a good current C++ compiler can auto-vectorize
- * them (e.g. gcc-4.x with -ftree-vectorize). Provide calls out to
- * supported vector libraries (e.g. IPP, Accelerate) where useful.
- * No intrinsics or assembly.
+ * them (e.g. gcc-4+ with -ftree-vectorize/-O3). Provide calls out to
+ * supported vector libraries (e.g. IPP, Accelerate) where useful.  No
+ * intrinsics or assembly.
+ *
+ * Size and index arguments are plain machine ints, to facilitate
+ * compiler optimization and vectorization. In general these functions
+ * should be used only with buffers whose sizes are calculated from
+ * known processing parameters and that are known to be much smaller
+ * than 32-bit int range. For security reasons you should not use
+ * these functions with buffers whose sizes may be under control of
+ * the user or external input.
  *
  * Argument ordering:
  *
@@ -71,11 +84,13 @@
  * if present (i.e. argument ordering follows memcpy).
  * 
  * The final argument is always a count of the number of elements in
- * each vector. 
+ * each vector.
  *
  * Some functions operate on a set of vectors at once: their names all
  * contain the text _channels, and the number of channels (i.e. number
- * of vectors) is the argument before last.
+ * of vectors) is the argument before last. If the number of input and
+ * output channels may differ, the output channel count immediately
+ * follows the output channel pointer.
  *
  * Any additional arguments, e.g. scale factors, appear between the
  * vector pointers at the start and the counts at the end.
@@ -127,7 +142,7 @@
 {
     vDSP_vclrD(vec, 1, count);
 }
-#endif
+#endif // HAVE_IPP
 
 /**
  * v_zero_channels
@@ -194,7 +209,7 @@
 {
     ippsCopy_64f(src, dst, count);
 }
-#endif
+#endif // HAVE_IPP
 
 /**
  * v_copy_channels
@@ -247,7 +262,30 @@
 {
     ippsMove_64f(src, dst, count);
 }
-#endif
+#endif // HAVE_IPP
+
+/**
+ * v_move_channels
+ *
+ * Copy the contents of the individual vectors in the set \arg src to
+ * the corresponding vectors in the set \arg dst. All vectors have
+ * length \arg count and there are \arg channels vectors in each set.
+ *
+ * The source vectors may overlap with the target vectors of
+ * corresponding channels. That is, src[i] may overlap with dst[i] but
+ * not with any other source or target vector. (If you know that no
+ * vectors can overlap, use v_copy instead.)
+ */
+template<typename T>
+inline void v_move_channels(T *const *const dst,       // not BQ_R__ (aliased)
+                            const T *const *const src, // not BQ_R__ (aliased)
+                            const int channels,
+                            const int count)
+{
+    for (int c = 0; c < channels; ++c) {
+        v_move(dst[c], src[c], count);
+    }
+}
 
 /**
  * v_convert
@@ -314,7 +352,7 @@
 {
     vDSP_vdpsp((double *)src, 1, dst, 1, count);
 }
-#endif
+#endif // HAVE_VDSP
 
 /**
  * v_convert_channels
@@ -344,71 +382,55 @@
  * v_add
  *
  * Add the elements in the vector \arg src to the corresponding
- * elements in the vector \arg dst, both of length arg \count, leaving
- * the result in \arg dst.
+ * elements in the vector \arg srcdst, both of length arg \count, leaving
+ * the result in \arg srcdst.
  *
- * Caller guarantees that \arg src and \arg dst are non-overlapping.
+ * Caller guarantees that \arg src and \arg srcdst are non-overlapping.
  */
 template<typename T>
-inline void v_add(T *const BQ_R__ dst,
+inline void v_add(T *const BQ_R__ srcdst,
                   const T *const BQ_R__ src,
                   const int count)
 {
     for (int i = 0; i < count; ++i) {
-        dst[i] += src[i];
-    }
-}
-
-/**
- * v_add
- *
- * Add the constant \arg value to every element of the vector \arg
- * dst, of length arg \count, leaving the result in \arg dst.
- */
-template<typename T>
-inline void v_add(T *const BQ_R__ dst,
-                  const T value,
-                  const int count)
-{
-    for (int i = 0; i < count; ++i) {
-        dst[i] += value;
+        srcdst[i] += src[i];
     }
 }
 
 #if defined HAVE_IPP
 template<>
-inline void v_add(float *const BQ_R__ dst,
+inline void v_add(float *const BQ_R__ srcdst,
                   const float *const BQ_R__ src,
                   const int count)
 {
-    ippsAdd_32f_I(src, dst, count);
+    ippsAdd_32f_I(src, srcdst, count);
 }    
-inline void v_add(double *const BQ_R__ dst,
+inline void v_add(double *const BQ_R__ srcdst,
                   const double *const BQ_R__ src,
                   const int count)
 {
-    ippsAdd_64f_I(src, dst, count);
+    ippsAdd_64f_I(src, srcdst, count);
 }    
-#endif
+#endif // HAVE_IPP
 
 /**
  * v_add_channels
  *
  * Add the elements in the individual vectors in the set \arg src to
  * the corresponding elements of the corresponding vectors in \arg
- * dst, leaving the results in \arg dst. All vectors have length \arg
+ * srcdst, leaving the results in \arg srcdst. All vectors have length \arg
  * count, and there are \arg channels vectors in each set.
  *
- * Caller guarantees that all of the \arg src and \arg dst vectors are
+ * Caller guarantees that all of the \arg src and \arg srcdst vectors are
  * non-overlapping with each other.
  */
 template<typename T>
-inline void v_add_channels(T *const BQ_R__ *const BQ_R__ dst,
+inline void v_add_channels(T *const BQ_R__ *const BQ_R__ srcdst,
                            const T *const BQ_R__ *const BQ_R__ src,
                            const int channels, const int count)
 {
     for (int c = 0; c < channels; ++c) {
-        v_add(dst[c], src[c], count);
+        v_add(srcdst[c], src[c], count);
     }
 }
 
@@ -417,19 +439,19 @@
  *
  * Add the elements in the vector \arg src, each multiplied by the
  * constant factor \arg gain, to the corresponding elements in the
- * vector \arg dst, both of length arg \count, leaving the result in
- * \arg dst.
+ * vector \arg srcdst, both of length arg \count, leaving the result in
+ * \arg srcdst.
  *
- * Caller guarantees that \arg src and \arg dst are non-overlapping.
+ * Caller guarantees that \arg src and \arg srcdst are non-overlapping.
  */
 template<typename T, typename G>
-inline void v_add_with_gain(T *const BQ_R__ dst,
+inline void v_add_with_gain(T *const BQ_R__ srcdst,
                             const T *const BQ_R__ src,
                             const G gain,
                             const int count)
 {
     for (int i = 0; i < count; ++i) {
-        dst[i] += src[i] * gain;
+        srcdst[i] += src[i] * gain;
     }
 }
 
@@ -438,22 +460,22 @@
  *
  * Add the elements in the individual vectors in the set \arg src,
  * each multiplied by the constant factor \arg gain, to the
- * corresponding elements of the corresponding vectors in \arg dst,
- * leaving the results in \arg dst. All vectors have length \arg
+ * corresponding elements of the corresponding vectors in \arg srcdst,
+ * leaving the results in \arg srcdst. All vectors have length \arg
  * count, and there are \arg channels vectors in each set.
  *
- * Caller guarantees that all of the \arg src and \arg dst vectors are
+ * Caller guarantees that all of the \arg src and \arg srcdst vectors are
  * non-overlapping with each other.
  */
 template<typename T, typename G>
-inline void v_add_channels_with_gain(T *const BQ_R__ *const BQ_R__ dst,
+inline void v_add_channels_with_gain(T *const BQ_R__ *const BQ_R__ srcdst,
                                      const T *const BQ_R__ *const BQ_R__ src,
                                      const G gain,
                                      const int channels,
                                      const int count)
 {
     for (int c = 0; c < channels; ++c) {
-        v_add_with_gain(dst[c], src[c], gain, count);
+        v_add_with_gain(srcdst[c], src[c], gain, count);
     }
 }
 
@@ -461,105 +483,121 @@
  * v_subtract
  *
  * Subtract the elements in the vector \arg src from the corresponding
- * elements in the vector \arg dst, both of length arg \count, leaving
- * the result in \arg dst.
+ * elements in the vector \arg srcdst, both of length arg \count, leaving
+ * the result in \arg srcdst.
  *
- * Caller guarantees that \arg src and \arg dst are non-overlapping.
+ * Caller guarantees that \arg src and \arg srcdst are non-overlapping.
  */
 template<typename T>
-inline void v_subtract(T *const BQ_R__ dst,
+inline void v_subtract(T *const BQ_R__ srcdst,
                        const T *const BQ_R__ src,
                        const int count)
 {
     for (int i = 0; i < count; ++i) {
-        dst[i] -= src[i];
+        srcdst[i] -= src[i];
     }
 }
 
 #if defined HAVE_IPP
 template<>
-inline void v_subtract(float *const BQ_R__ dst,
+inline void v_subtract(float *const BQ_R__ srcdst,
                        const float *const BQ_R__ src,
                        const int count)
 {
-    ippsSub_32f_I(src, dst, count);
+    ippsSub_32f_I(src, srcdst, count);
 }    
-inline void v_subtract(double *const BQ_R__ dst,
+inline void v_subtract(double *const BQ_R__ srcdst,
                        const double *const BQ_R__ src,
                        const int count)
 {
-    ippsSub_64f_I(src, dst, count);
+    ippsSub_64f_I(src, srcdst, count);
 }    
-#endif
+#endif // HAVE_IPP
 
 /**
  * v_scale
  *
- * Scale the elements in the vector \arg dst, of length \arg count, by
+ * Scale the elements in the vector \arg srcdst, of length \arg count, by
  * the factor \arg gain.
  */
 template<typename T, typename G>
-inline void v_scale(T *const BQ_R__ dst,
+inline void v_scale(T *const BQ_R__ srcdst,
                     const G gain,
                     const int count)
 {
     for (int i = 0; i < count; ++i) {
-        dst[i] *= gain;
+        srcdst[i] *= gain;
     }
 }
 
 #if defined HAVE_IPP 
 template<>
-inline void v_scale(float *const BQ_R__ dst,
+inline void v_scale(float *const BQ_R__ srcdst,
                     const float gain,
                     const int count)
 {
-    ippsMulC_32f_I(gain, dst, count);
+    ippsMulC_32f_I(gain, srcdst, count);
 }
 template<>
-inline void v_scale(double *const BQ_R__ dst,
+inline void v_scale(double *const BQ_R__ srcdst,
                     const double gain,
                     const int count)
 {
-    ippsMulC_64f_I(gain, dst, count);
+    ippsMulC_64f_I(gain, srcdst, count);
 }
-#endif
+#endif // HAVE_IPP
+
+/**
+ * v_increment
+ *
+ * Add a constant quantity \incr to all of the elements in the vector
+ * \arg srcdst, of length \arg count.
+ */
+template<typename T, typename G>
+inline void v_increment(T *const BQ_R__ srcdst,
+                        const G incr,
+                        const int count)
+{
+    for (int i = 0; i < count; ++i) {
+        srcdst[i] += T(incr);
+    }
+}
 
 /**
  * v_multiply
  *
- * Multiply the elements in the vector \arg dst by the corresponding
+ * Multiply the elements in the vector \arg srcdst by the corresponding
  * elements in the vector \arg src, both of length arg \count, leaving
- * the result in \arg dst.
+ * the result in \arg srcdst.
  *
- * Caller guarantees that \arg src and \arg dst are non-overlapping.
+ * Caller guarantees that \arg src and \arg srcdst are non-overlapping.
  */
 template<typename T>
-inline void v_multiply(T *const BQ_R__ dst,
+inline void v_multiply(T *const BQ_R__ srcdst,
                        const T *const BQ_R__ src,
                        const int count)
 {
     for (int i = 0; i < count; ++i) {
-        dst[i] *= src[i];
+        srcdst[i] *= src[i];
     }
 }
 
 #if defined HAVE_IPP 
 template<>
-inline void v_multiply(float *const BQ_R__ dst,
+inline void v_multiply(float *const BQ_R__ srcdst,
                        const float *const BQ_R__ src,
                        const int count)
 {
-    ippsMul_32f_I(src, dst, count);
+    ippsMul_32f_I(src, srcdst, count);
 }
 template<>
-inline void v_multiply(double *const BQ_R__ dst,
+inline void v_multiply(double *const BQ_R__ srcdst,
                        const double *const BQ_R__ src,
                        const int count)
 {
-    ippsMul_64f_I(src, dst, count);
+    ippsMul_64f_I(src, srcdst, count);
 }
-#endif
+#endif // HAVE_IPP
 
 /**
  * v_multiply
@@ -572,10 +610,10 @@
  * non-overlapping.
  */
 template<typename T>
-inline void v_multiply(T *const BQ_R__ dst,
-                       const T *const BQ_R__ src1,
-                       const T *const BQ_R__ src2,
-                       const int count)
+inline void v_multiply_to(T *const BQ_R__ dst,
+                          const T *const BQ_R__ src1,
+                          const T *const BQ_R__ src2,
+                          const int count)
 {
     for (int i = 0; i < count; ++i) {
         dst[i] = src1[i] * src2[i];
@@ -584,98 +622,98 @@
 
 #if defined HAVE_IPP 
 template<>
-inline void v_multiply(float *const BQ_R__ dst,
-                       const float *const BQ_R__ src1,
-                       const float *const BQ_R__ src2,
-                       const int count)
+inline void v_multiply_to(float *const BQ_R__ dst,
+                          const float *const BQ_R__ src1,
+                          const float *const BQ_R__ src2,
+                          const int count)
 {
     ippsMul_32f(src1, src2, dst, count);
 }    
 template<>
-inline void v_multiply(double *const BQ_R__ dst,
-                       const double *const BQ_R__ src1,
-                       const double *const BQ_R__ src2,
-                       const int count)
+inline void v_multiply_to(double *const BQ_R__ dst,
+                          const double *const BQ_R__ src1,
+                          const double *const BQ_R__ src2,
+                          const int count)
 {
     ippsMul_64f(src1, src2, dst, count);
 }
-#endif
+#endif // HAVE_IPP
 
 /**
  * v_divide
  *
- * Divide the elements in the vector \arg dst by the corresponding
+ * Divide the elements in the vector \arg srcdst by the corresponding
  * elements in the vector \arg src, both of length arg \count, leaving
- * the result in \arg dst.
+ * the result in \arg srcdst.
  *
- * Caller guarantees that \arg src and \arg dst are non-overlapping.
+ * Caller guarantees that \arg src and \arg srcdst are non-overlapping.
  */
 template<typename T>
-inline void v_divide(T *const BQ_R__ dst,
+inline void v_divide(T *const BQ_R__ srcdst,
                      const T *const BQ_R__ src,
                      const int count)
 {
     for (int i = 0; i < count; ++i) {
-        dst[i] /= src[i];
+        srcdst[i] /= src[i];
     }
 }
 
 #if defined HAVE_IPP 
 template<>
-inline void v_divide(float *const BQ_R__ dst,
+inline void v_divide(float *const BQ_R__ srcdst,
                      const float *const BQ_R__ src,
                      const int count)
 {
-    ippsDiv_32f_I(src, dst, count);
+    ippsDiv_32f_I(src, srcdst, count);
 }
 template<>
-inline void v_divide(double *const BQ_R__ dst,
+inline void v_divide(double *const BQ_R__ srcdst,
                      const double *const BQ_R__ src,
                      const int count)
 {
-    ippsDiv_64f_I(src, dst, count);
+    ippsDiv_64f_I(src, srcdst, count);
 }
-#endif
+#endif // HAVE_IPP
 
 /**
  * v_multiply_and_add
  *
  * Multiply the corresponding elements of the vectors \arg src1 and
  * \arg src2, both of length arg \count, and add the results to the
- * corresponding elements of vector \arg dst.
+ * corresponding elements of vector \arg srcdst.
  *
- * Caller guarantees that \arg src1, \arg src2 and \arg dst are
+ * Caller guarantees that \arg src1, \arg src2 and \arg srcdst are
  * non-overlapping.
  */
 template<typename T>
-inline void v_multiply_and_add(T *const BQ_R__ dst,
+inline void v_multiply_and_add(T *const BQ_R__ srcdst,
                                const T *const BQ_R__ src1,
                                const T *const BQ_R__ src2,
                                const int count)
 {
     for (int i = 0; i < count; ++i) {
-        dst[i] += src1[i] * src2[i];
+        srcdst[i] += src1[i] * src2[i];
     }
 }
 
 #if defined HAVE_IPP
 template<>
-inline void v_multiply_and_add(float *const BQ_R__ dst,
+inline void v_multiply_and_add(float *const BQ_R__ srcdst,
                                const float *const BQ_R__ src1,
                                const float *const BQ_R__ src2,
                                const int count)
 {
-    ippsAddProduct_32f(src1, src2, dst, count);
+    ippsAddProduct_32f(src1, src2, srcdst, count);
 }
 template<>
-inline void v_multiply_and_add(double *const BQ_R__ dst,
+inline void v_multiply_and_add(double *const BQ_R__ srcdst,
                                const double *const BQ_R__ src1,
                                const double *const BQ_R__ src2,
                                const int count)
 {
-    ippsAddProduct_64f(src1, src2, dst, count);
+    ippsAddProduct_64f(src1, src2, srcdst, count);
 }
-#endif
+#endif // HAVE_IPP
 
 /**
  * v_sum
@@ -695,32 +733,53 @@
 }
 
 /**
+ * v_multiply_and_sum
+ *
+ * Multiply the corresponding elements of the vectors \arg src1 and
+ * \arg src2, both of length arg \count, sum the results, and return
+ * the sum as a scalar value.
+ *
+ * Caller guarantees that \arg src1 and \arg src2 are non-overlapping.
+ */
+template<typename T>
+inline T v_multiply_and_sum(const T *const BQ_R__ src1,
+                            const T *const BQ_R__ src2,
+                            const int count)
+{
+    T result = T();
+    for (int i = 0; i < count; ++i) {
+        result += src1[i] * src2[i];
+    }
+    return result;
+}
+
+/**
  * v_log
  *
- * Replace each element in vector \arg dst, of length \arg count, with
+ * Replace each element in vector \arg srcdst, of length \arg count, with
  * its natural logarithm.
  */
 template<typename T>
-inline void v_log(T *const BQ_R__ dst,
+inline void v_log(T *const BQ_R__ srcdst,
                   const int count)
 {
     for (int i = 0; i < count; ++i) {
-        dst[i] = log(dst[i]);
+        srcdst[i] = log(srcdst[i]);
     }
 }
 
 #if defined HAVE_IPP
 template<>
-inline void v_log(float *const BQ_R__ dst,
+inline void v_log(float *const BQ_R__ srcdst,
                   const int count)
 {
-    ippsLn_32f_I(dst, count);
+    ippsLn_32f_I(srcdst, count);
 }
 template<>
-inline void v_log(double *const BQ_R__ dst,
+inline void v_log(double *const BQ_R__ srcdst,
                   const int count)
 {
-    ippsLn_64f_I(dst, count);
+    ippsLn_64f_I(srcdst, count);
 }
 #elif defined HAVE_VDSP
 // no in-place vForce functions for these -- can we use the
@@ -728,50 +787,50 @@
 // use an out-of-place one with temporary buffer and still be faster
 // than doing it any other way?
 template<>
-inline void v_log(float *const BQ_R__ dst,
+inline void v_log(float *const BQ_R__ srcdst,
                   const int count)
 {
     float tmp[count];
-    vvlogf(tmp, dst, &count);
-    v_copy(dst, tmp, count);
+    vvlogf(tmp, srcdst, &count);
+    v_copy(srcdst, tmp, count);
 }
 template<>
-inline void v_log(double *const BQ_R__ dst,
+inline void v_log(double *const BQ_R__ srcdst,
                   const int count)
 {
     double tmp[count];
-    vvlog(tmp, dst, &count);
-    v_copy(dst, tmp, count);
+    vvlog(tmp, srcdst, &count);
+    v_copy(srcdst, tmp, count);
 }
-#endif
+#endif // HAVE_VDSP
 
 /**
  * v_exp
  *
- * Replace each element in vector \arg dst, of length \arg count, with
+ * Replace each element in vector \arg srcdst, of length \arg count, with
  * its base-e exponential.
  */
 template<typename T>
-inline void v_exp(T *const BQ_R__ dst,
+inline void v_exp(T *const BQ_R__ srcdst,
                   const int count)
 {
     for (int i = 0; i < count; ++i) {
-        dst[i] = exp(dst[i]);
+        srcdst[i] = exp(srcdst[i]);
     }
 }
 
 #if defined HAVE_IPP
 template<>
-inline void v_exp(float *const BQ_R__ dst,
+inline void v_exp(float *const BQ_R__ srcdst,
                   const int count)
 {
-    ippsExp_32f_I(dst, count);
+    ippsExp_32f_I(srcdst, count);
 }
 template<>
-inline void v_exp(double *const BQ_R__ dst,
+inline void v_exp(double *const BQ_R__ srcdst,
                   const int count)
 {
-    ippsExp_64f_I(dst, count);
+    ippsExp_64f_I(srcdst, count);
 }
 #elif defined HAVE_VDSP
 // no in-place vForce functions for these -- can we use the
@@ -779,50 +838,50 @@
 // use an out-of-place one with temporary buffer and still be faster
 // than doing it any other way?
 template<>
-inline void v_exp(float *const BQ_R__ dst,
+inline void v_exp(float *const BQ_R__ srcdst,
                   const int count)
 {
     float tmp[count];
-    vvexpf(tmp, dst, &count);
-    v_copy(dst, tmp, count);
+    vvexpf(tmp, srcdst, &count);
+    v_copy(srcdst, tmp, count);
 }
 template<>
-inline void v_exp(double *const BQ_R__ dst,
+inline void v_exp(double *const BQ_R__ srcdst,
                   const int count)
 {
     double tmp[count];
-    vvexp(tmp, dst, &count);
-    v_copy(dst, tmp, count);
+    vvexp(tmp, srcdst, &count);
+    v_copy(srcdst, tmp, count);
 }
-#endif
+#endif // HAVE_VDSP
 
 /**
  * v_sqrt
  *
- * Replace each element in vector \arg dst, of length \arg count, with
+ * Replace each element in vector \arg srcdst, of length \arg count, with
  * its square root.
  */
 template<typename T>
-inline void v_sqrt(T *const BQ_R__ dst,
+inline void v_sqrt(T *const BQ_R__ srcdst,
                    const int count)
 {
     for (int i = 0; i < count; ++i) {
-        dst[i] = sqrt(dst[i]);
+        srcdst[i] = sqrt(srcdst[i]);
     }
 }
 
 #if defined HAVE_IPP
 template<>
-inline void v_sqrt(float *const BQ_R__ dst,
+inline void v_sqrt(float *const BQ_R__ srcdst,
                    const int count)
 {
-    ippsSqrt_32f_I(dst, count);
+    ippsSqrt_32f_I(srcdst, count);
 }
 template<>
-inline void v_sqrt(double *const BQ_R__ dst,
+inline void v_sqrt(double *const BQ_R__ srcdst,
                    const int count)
 {
-    ippsSqrt_64f_I(dst, count);
+    ippsSqrt_64f_I(srcdst, count);
 }
 #elif defined HAVE_VDSP
 // no in-place vForce functions for these -- can we use the
@@ -830,95 +889,95 @@
 // use an out-of-place one with temporary buffer and still be faster
 // than doing it any other way?
 template<>
-inline void v_sqrt(float *const BQ_R__ dst,
+inline void v_sqrt(float *const BQ_R__ srcdst,
                    const int count)
 {
     float tmp[count];
-    vvsqrtf(tmp, dst, &count);
-    v_copy(dst, tmp, count);
+    vvsqrtf(tmp, srcdst, &count);
+    v_copy(srcdst, tmp, count);
 }
 template<>
-inline void v_sqrt(double *const BQ_R__ dst,
+inline void v_sqrt(double *const BQ_R__ srcdst,
                    const int count)
 {
     double tmp[count];
-    vvsqrt(tmp, dst, &count);
-    v_copy(dst, tmp, count);
+    vvsqrt(tmp, srcdst, &count);
+    v_copy(srcdst, tmp, count);
 }
-#endif
+#endif // HAVE_VDSP
 
 /**
  * v_square
  *
- * Replace each element in vector \arg dst, of length \arg count, with
+ * Replace each element in vector \arg srcdst, of length \arg count, with
  * its square.
  */
 template<typename T>
-inline void v_square(T *const BQ_R__ dst,
+inline void v_square(T *const BQ_R__ srcdst,
                    const int count)
 {
     for (int i = 0; i < count; ++i) {
-        dst[i] = dst[i] * dst[i];
+        srcdst[i] = srcdst[i] * srcdst[i];
     }
 }
 
 #if defined HAVE_IPP
 template<>
-inline void v_square(float *const BQ_R__ dst,
+inline void v_square(float *const BQ_R__ srcdst,
                    const int count)
 {
-    ippsSqr_32f_I(dst, count);
+    ippsSqr_32f_I(srcdst, count);
 }
 template<>
-inline void v_square(double *const BQ_R__ dst,
+inline void v_square(double *const BQ_R__ srcdst,
                    const int count)
 {
-    ippsSqr_64f_I(dst, count);
+    ippsSqr_64f_I(srcdst, count);
 }
-#endif
+#endif // HAVE_IPP
 
 /**
  * v_abs
  *
- * Replace each element in vector \arg dst, of length \arg count, with
+ * Replace each element in vector \arg srcdst, of length \arg count, with
  * its absolute value.
  */
 template<typename T>
-inline void v_abs(T *const BQ_R__ dst,
+inline void v_abs(T *const BQ_R__ srcdst,
                   const int count)
 {
     for (int i = 0; i < count; ++i) {
-        dst[i] = fabs(dst[i]);
+        srcdst[i] = fabs(srcdst[i]);
     }
 }
 
 #if defined HAVE_IPP
 template<>
-inline void v_abs(float *const BQ_R__ dst,
+inline void v_abs(float *const BQ_R__ srcdst,
                   const int count)
 {
-    ippsAbs_32f_I(dst, count);
+    ippsAbs_32f_I(srcdst, count);
 }
 template<>
-inline void v_abs(double *const BQ_R__ dst,
+inline void v_abs(double *const BQ_R__ srcdst,
                   const int count)
 {
-    ippsAbs_64f_I(dst, count);
+    ippsAbs_64f_I(srcdst, count);
 }
 #elif defined HAVE_VDSP
 template<>
-inline void v_abs(float *const BQ_R__ dst,
+inline void v_abs(float *const BQ_R__ srcdst,
                   const int count)
 {
     float tmp[count];
 #if (defined(MACOSX_DEPLOYMENT_TARGET) && MACOSX_DEPLOYMENT_TARGET <= 1070 && MAC_OS_X_VERSION_MIN_REQUIRED <= 1070)
-    vvfabf(tmp, dst, &count);
+    vvfabf(tmp, srcdst, &count);
 #else
-    vvfabsf(tmp, dst, &count);
+    vvfabsf(tmp, srcdst, &count);
 #endif
-    v_copy(dst, tmp, count);
+    v_copy(srcdst, tmp, count);
 }
-#endif
+#endif // HAVE_VDSP
 
 /**
  * v_interleave
@@ -959,6 +1018,8 @@
 }
 
 #if defined HAVE_IPP 
+#if (IPP_VERSION_MAJOR <= 7)
+// Deprecated in v8, removed in v9
 template<>
 inline void v_interleave(float *const BQ_R__ dst,
                          const float *const BQ_R__ *const BQ_R__ src,
@@ -969,6 +1030,7 @@
 }
 // IPP does not (currently?) provide double-precision interleave
 #endif
+#endif // HAVE_IPP
 
 /**
  * v_deinterleave
@@ -1009,6 +1071,8 @@
 }
 
 #if defined HAVE_IPP
+#if (IPP_VERSION_MAJOR <= 7)
+// Deprecated in v8, removed in v9
 template<>
 inline void v_deinterleave(float *const BQ_R__ *const BQ_R__ dst,
                            const float *const BQ_R__ src,
@@ -1019,6 +1083,7 @@
 }
 // IPP does not (currently?) provide double-precision deinterleave
 #endif
+#endif // HAVE_IPP
 
 /**
  * v_fftshift
@@ -1043,12 +1108,13 @@
  * v_mean
  *
  * Return the mean of the values in the vector \arg vec, of length
- * \arg count.
+ * \arg count. For an empty vector, return 0.0.
  */
 template<typename T>
 inline T v_mean(const T *const BQ_R__ vec, const int count)
 {
     T t = T(0);
+    if (count == 0) return t;
     for (int i = 0; i < count; ++i) {
         t += vec[i];
     }
@@ -1076,6 +1142,144 @@
     return t;
 }
 
+/**
+ * v_mix
+ *
+ * Mixdown N channels to 1 channel by simple averaging.
+ *
+ * Add the elements in the individual vectors in the set \arg in,
+ * each multiplied by the a constant factor 1 / \arg channels, and
+ * leave the results in \arg out. All vectors have length \arg count,
+ * and there are \arg channels vectors in the input set.
+ *
+ * Caller guarantees that all of the \arg in and \arg out vectors are
+ * non-overlapping with each other.
+ */
+template <typename T>
+inline void v_mix(T *const BQ_R__ out,
+                  const T *const BQ_R__ *const BQ_R__ in,
+                  const int channels,
+                  const int count)
+{
+    v_zero(out, count);
+    for (int c = 0; c < channels; ++c) {
+        v_add(out, in[c], count);
+    }
+    v_scale(out, T(1.0) / T(channels), count);
+}
+
+/**
+ * v_reconfigure_channels
+ *
+ * Adapt a fixed number of frames from n to m channels.
+ * 
+ * We vaguely assume these channel ordering conventions (from
+ * Vorbis/Opus docs):
+ *
+ * 1 channel = mono
+ * 2 = stereo
+ * 3 = l/c/r
+ * 4 = fl/fr/rl/rr
+ * 5 = fl/c/fr/rl/rr
+ * 6 (= 5.1) = fl/c/fr/rl/rr/lfe
+ * 7 (= 6.1) = fl/c/fr/sl/sr/rc/lfe
+ * 8 (= 7.1) = fl/c/fr/sl/sr/rl/rr/lfe
+ * >8 = application-specific
+ *
+ * (where l = left, c = centre, r = right, fl/fr = front l/r; sl/sr =
+ * side l/r; rl/rr = rear l/r; lfe = low-frequency effects channel)
+ *
+ * The reconfiguration rules are:
+ * -- if n == m, copy the input through unchanged
+ * -- else if m == 1, mixdown to mono by averaging all n input channels
+ * -- else if n == 1, duplicate the mono input across all m output channels
+ * -- else if m == 2 and n == 3 or n >= 5, take 1st and 3rd channels of input
+ * -- else if n == 2 and m == 3 or m >= 5, copy to 1st and 3rd, the rest silent
+ * -- else if n > m, take the first m channels of the input
+ * -- else take all n channels of the input and add m-n silent channels
+ * 
+ * The aim here is to be simple and not unreasonable, to reconfigure,
+ * not to properly convert - that would involve more
+ * application-specific decisions.
+ */
+template<typename T>
+inline void v_reconfigure_channels(T *const BQ_R__ *const BQ_R__ out,
+                                   const int m, /* out channel count */
+                                   const T *const BQ_R__ *const BQ_R__ in,
+                                   const int n, /* in channel count */
+                                   const int count)
+{
+    if (n == m) {
+        v_copy_channels(out, in, n, count);
+    } else if (m == 1) {
+        v_mix(out[0], in, n, count);
+    } else if (n == 1) {
+        for (int c = 0; c < m; ++c) {
+            v_copy(out[c], in[0], count);
+        }
+    } else if (m == 2 && (n == 3 || n >= 5)) {
+        v_copy(out[0], in[0], count);
+        v_copy(out[1], in[2], count);
+    } else if (n == 2 && (m == 3 || m >= 5)) {
+        v_copy(out[0], in[0], count);
+        v_zero(out[1], count);
+        v_copy(out[2], in[1], count);
+        for (int c = 3; c < m; ++c) {
+            v_zero(out[c], count);
+        }
+    } else {
+        int c = 0;
+        while (c < n && c < m) {
+            v_copy(out[c], in[c], count);
+            ++c;
+        }
+        while (c < m) {
+            v_zero(out[c], count);
+            ++c;
+        }
+    }
+}
+
+/**
+ * v_reconfigure_channels_inplace
+ *
+ * Convert a fixed number of frames from n to m channels, operating
+ * in-place. That is, the input and output buffer arrays are the same,
+ * having space for max(n, m) channel arrays, and we read n channels
+ * from them and write back m.
+ *
+ * The rules are:
+ * -- if n >= m and m > 1, leave unchanged
+ * -- else if m == 1, mixdown to mono by averaging all n channels
+ * -- else if n == 1, duplicate the mono input across all m channels
+ * -- else leave first n channels and add m-n silent channels
+ */
+template<typename T>
+inline void v_reconfigure_channels_inplace(T *const BQ_R__ *const BQ_R__ inout,
+                                           const int m, /* out channel count */
+                                           const int n, /* in channel count */
+                                           const int count)
+{
+    if (n >= m && m > 1) {
+        return;
+    } else if (m == 1) {
+        for (int c = 1; c < n; ++c) {
+            v_add(inout[0], inout[c], count);
+        }
+        v_scale(inout[0], T(1.0) / T(n), count);
+    } else if (n == 1) {
+        for (int c = 1; c < m; ++c) {
+            v_copy(inout[c], inout[0], count);
+        }
+    } else {
+        int c = n;
+        while (c < m) {
+            v_zero(inout[c], count);
+            ++c;
+        }
+    }
+}
+
 }
 
 #endif
--- a/bqvec/bqvec/VectorOpsComplex.h	Sat Nov 12 09:59:34 2016 +0000
+++ b/bqvec/bqvec/VectorOpsComplex.h	Tue Nov 19 10:13:32 2019 +0000
@@ -6,7 +6,7 @@
     A small library for vector arithmetic and allocation in C++ using
     raw C pointer arrays.
 
-    Copyright 2007-2015 Particular Programs Ltd.
+    Copyright 2007-2016 Particular Programs Ltd.
 
     Permission is hereby granted, free of charge, to any person
     obtaining a copy of this software and associated documentation
@@ -61,7 +61,7 @@
     } else {
         vDSP_vclrD((double *)ptr, 1, count * 2);
     }
-#else
+#else // ! HAVE_VDSP
     const bq_complex_element_t value = 0.0;
     for (int i = 0; i < count; ++i) {
         ptr[i].re = value;
@@ -93,7 +93,7 @@
         ippsMove_64fc((const Ipp64fc *)src, (Ipp64fc *)dst, count);
     }
 }
-#endif
+#endif // HAVE_IPP
 
 template<>
 inline void v_convert(bq_complex_t *const BQ_R__ dst,
@@ -120,19 +120,19 @@
     }
 }
 
-inline void c_add(bq_complex_t &dst,
+inline void c_add(bq_complex_t &srcdst,
                   const bq_complex_t &src)
 {
-    dst.re += src.re;
-    dst.im += src.im;
+    srcdst.re += src.re;
+    srcdst.im += src.im;
 }
 
-inline void c_add_with_gain(bq_complex_t &dst,
+inline void c_add_with_gain(bq_complex_t &srcdst,
                             const bq_complex_t &src,
                             const bq_complex_element_t gain)
 {
-    dst.re += src.re * gain;
-    dst.im += src.im * gain;
+    srcdst.re += src.re * gain;
+    srcdst.im += src.im * gain;
 }
 
 inline void c_multiply(bq_complex_t &dst,
@@ -159,75 +159,75 @@
     dst.im = imag;
 }
 
-inline void c_multiply(bq_complex_t &dst,
+inline void c_multiply(bq_complex_t &srcdst,
                        const bq_complex_t &src)
 {
-    c_multiply(dst, dst, src);
+    c_multiply(srcdst, srcdst, src);
 }
 
-inline void c_multiply_and_add(bq_complex_t &dst,
+inline void c_multiply_and_add(bq_complex_t &srcdst,
                                const bq_complex_t &src1,
                                const bq_complex_t &src2)
 {
     bq_complex_t tmp;
     c_multiply(tmp, src1, src2);
-    c_add(dst, tmp);
+    c_add(srcdst, tmp);
 }
 
 template<>
-inline void v_add(bq_complex_t *const BQ_R__ dst,
+inline void v_add(bq_complex_t *const BQ_R__ srcdst,
                   const bq_complex_t *const BQ_R__ src,
                   const int count)
 {
 #if defined HAVE_IPP
     if (sizeof(bq_complex_element_t) == sizeof(float)) {
-        ippsAdd_32fc_I((Ipp32fc *)src, (Ipp32fc *)dst, count);
+        ippsAdd_32fc_I((Ipp32fc *)src, (Ipp32fc *)srcdst, count);
     } else {
-        ippsAdd_64fc_I((Ipp64fc *)src, (Ipp64fc *)dst, count);
+        ippsAdd_64fc_I((Ipp64fc *)src, (Ipp64fc *)srcdst, count);
     }
 #else
     for (int i = 0; i < count; ++i) {
-        dst[i].re += src[i].re;
-        dst[i].im += src[i].im;
+        srcdst[i].re += src[i].re;
+        srcdst[i].im += src[i].im;
     }
-#endif
+#endif // HAVE_IPP
 }    
 
 template<>
-inline void v_add_with_gain(bq_complex_t *const BQ_R__ dst,
+inline void v_add_with_gain(bq_complex_t *const BQ_R__ srcdst,
                             const bq_complex_t *const BQ_R__ src,
                             const bq_complex_element_t gain,
                             const int count)
 {
     for (int i = 0; i < count; ++i) {
-        dst[i].re += src[i].re * gain;
-        dst[i].im += src[i].im * gain;
+        srcdst[i].re += src[i].re * gain;
+        srcdst[i].im += src[i].im * gain;
     }
 }
 
 template<>
-inline void v_multiply(bq_complex_t *const BQ_R__ dst,
+inline void v_multiply(bq_complex_t *const BQ_R__ srcdst,
                        const bq_complex_t *const BQ_R__ src,
                        const int count)
 {
 #ifdef HAVE_IPP
     if (sizeof(bq_complex_element_t) == sizeof(float)) {
-        ippsMul_32fc_I((const Ipp32fc *)src, (Ipp32fc *)dst, count);
+        ippsMul_32fc_I((const Ipp32fc *)src, (Ipp32fc *)srcdst, count);
     } else {
-        ippsMul_64fc_I((const Ipp64fc *)src, (Ipp64fc *)dst, count);
+        ippsMul_64fc_I((const Ipp64fc *)src, (Ipp64fc *)srcdst, count);
     }
 #else
     for (int i = 0; i < count; ++i) {
-        c_multiply(dst[i], src[i]);
+        c_multiply(srcdst[i], src[i]);
     }
-#endif
+#endif // HAVE_IPP
 }
 
 template<>
-inline void v_multiply(bq_complex_t *const BQ_R__ dst,
-                       const bq_complex_t *const BQ_R__ src1,
-                       const bq_complex_t *const BQ_R__ src2,
-                       const int count)
+inline void v_multiply_to(bq_complex_t *const BQ_R__ dst,
+                          const bq_complex_t *const BQ_R__ src1,
+                          const bq_complex_t *const BQ_R__ src2,
+                          const int count)
 {
 #ifdef HAVE_IPP
     if (sizeof(bq_complex_element_t) == sizeof(float)) {
@@ -241,11 +241,11 @@
     for (int i = 0; i < count; ++i) {
         c_multiply(dst[i], src1[i], src2[i]);
     }
-#endif
+#endif // HAVE_IPP
 }
 
 template<>
-inline void v_multiply_and_add(bq_complex_t *const BQ_R__ dst,
+inline void v_multiply_and_add(bq_complex_t *const BQ_R__ srcdst,
                                const bq_complex_t *const BQ_R__ src1,
                                const bq_complex_t *const BQ_R__ src2,
                                const int count)
@@ -253,16 +253,16 @@
 #ifdef HAVE_IPP
     if (sizeof(bq_complex_element_t) == sizeof(float)) {
         ippsAddProduct_32fc((const Ipp32fc *)src1, (const Ipp32fc *)src2,
-                            (Ipp32fc *)dst, count);
+                            (Ipp32fc *)srcdst, count);
     } else {
         ippsAddProduct_64fc((const Ipp64fc *)src1, (const Ipp64fc *)src2,
-                            (Ipp64fc *)dst, count);
+                            (Ipp64fc *)srcdst, count);
     }
 #else
     for (int i = 0; i < count; ++i) {
-        c_multiply_and_add(dst[i], src1[i], src2[i]);
+        c_multiply_and_add(srcdst[i], src1[i], src2[i]);
     }
-#endif
+#endif // HAVE_IPP
 }
 
 #if defined( __GNUC__ ) && defined( _WIN32 )
@@ -279,7 +279,7 @@
 }
 #endif
 
-#endif /* !NO_COMPLEX_TYPES */
+#endif // !NO_COMPLEX_TYPES
 
 template<typename T>
 inline void c_phasor(T *real, T *imag, T phase)
@@ -302,8 +302,12 @@
         *imag = sin(phase);
     }
 #elif defined __GNUC__
+#if defined __APPLE__
+#define sincos __sincos
+#define sincosf __sincosf
+#endif
     if (sizeof(T) == sizeof(float)) {
-        sincosf(phase, (float *)imag, (float *)real);
+        sincosf(float(phase), (float *)imag, (float *)real);
     } else {
         sincos(phase, (double *)imag, (double *)real);
     }
@@ -335,7 +339,7 @@
     *phase = atan;
     *mag = sqrtf(real * real + imag * imag);
 }
-#else
+#else // !USE_APPROXIMATE_ATAN2
 template<>
 inline void c_magphase(float *mag, float *phase, float real, float imag)
 {
@@ -368,15 +372,10 @@
                                       const bq_complex_element_t *const BQ_R__ src,
                                       const int count);
 
-inline void v_cartesian_to_polar(bq_complex_element_t *const BQ_R__ mag,
-                                 bq_complex_element_t *const BQ_R__ phase,
-                                 const bq_complex_t *const BQ_R__ src,
-                                 const int count)
-{
-    for (int i = 0; i < count; ++i) {
-        c_magphase<bq_complex_element_t>(mag + i, phase + i, src[i].re, src[i].im);
-    }
-}
+void v_cartesian_to_polar(bq_complex_element_t *const BQ_R__ mag,
+                          bq_complex_element_t *const BQ_R__ phase,
+                          const bq_complex_t *const BQ_R__ src,
+                          const int count);
 
 inline void v_cartesian_to_polar_interleaved(bq_complex_element_t *const BQ_R__ dst,
                                              const bq_complex_t *const BQ_R__ src,
@@ -384,11 +383,15 @@
 {
     for (int i = 0; i < count; ++i) {
         c_magphase<bq_complex_element_t>(&dst[i*2], &dst[i*2+1],
-                                    src[i].re, src[i].im);
+                                         src[i].re, src[i].im);
     }
 }
 
-#endif /* !NO_COMPLEX_TYPES */
+void v_cartesian_to_magnitudes(bq_complex_element_t *const BQ_R__ mag,
+                               const bq_complex_t *const BQ_R__ src,
+                               const int count);
+
+#endif // !NO_COMPLEX_TYPES
 
 template<typename S, typename T> // S source, T target
 void v_polar_to_cartesian(T *const BQ_R__ real,
@@ -434,7 +437,47 @@
     }
 }    
 
-#if defined USE_POMMIER_MATHFUN
+#ifdef HAVE_IPP
+template<>
+inline void v_polar_to_cartesian(float *const BQ_R__ real,
+                                 float *const BQ_R__ imag,
+                                 const float *const BQ_R__ mag,
+                                 const float *const BQ_R__ phase,
+                                 const int count)
+{
+    ippsPolarToCart_32f(mag, phase, real, imag, count);
+}    
+    
+template<>
+inline void v_polar_to_cartesian(double *const BQ_R__ real,
+                                 double *const BQ_R__ imag,
+                                 const double *const BQ_R__ mag,
+                                 const double *const BQ_R__ phase,
+                                 const int count)
+{
+    ippsPolarToCart_64f(mag, phase, real, imag, count);
+}    
+    
+template<>
+inline void v_polar_to_cartesian_interleaved(float *const BQ_R__ dst,
+                                             const float *const BQ_R__ mag,
+                                             const float *const BQ_R__ phase,
+                                             const int count)
+{
+    ippsPolarToCart_32fc(mag, phase, (Ipp32fc *)dst, count);
+}    
+
+template<>
+inline void v_polar_to_cartesian_interleaved(double *const BQ_R__ dst,
+                                             const double *const BQ_R__ mag,
+                                             const double *const BQ_R__ phase,
+                                             const int count)
+{
+    ippsPolarToCart_64fc(mag, phase, (Ipp64fc *)dst, count);
+}
+
+#elif defined USE_POMMIER_MATHFUN
+
 void v_polar_to_cartesian_pommier(float *const BQ_R__ real,
                                   float *const BQ_R__ imag,
                                   const float *const BQ_R__ mag,
@@ -472,8 +515,7 @@
 {
     v_polar_to_cartesian_interleaved_pommier(dst, mag, phase, count);
 }
-
-#endif
+#endif // USE_POMMIER_MATHFUN
 
 template<typename S, typename T> // S source, T target
 void v_cartesian_to_polar(T *const BQ_R__ mag,
@@ -498,7 +540,48 @@
     }
 }
 
-#ifdef HAVE_VDSP
+#ifdef HAVE_IPP
+
+template<>
+inline void v_cartesian_to_polar(float *const BQ_R__ mag,
+                                 float *const BQ_R__ phase,
+                                 const float *const BQ_R__ real,
+                                 const float *const BQ_R__ imag,
+                                 const int count)
+{
+    ippsCartToPolar_32f(real, imag, mag, phase, count);
+}
+
+template<>
+inline void v_cartesian_interleaved_to_polar(float *const BQ_R__ mag,
+                                             float *const BQ_R__ phase,
+                                             const float *const BQ_R__ src,
+                                             const int count)
+{
+    ippsCartToPolar_32fc((const Ipp32fc *)src, mag, phase, count);
+}
+
+template<>
+inline void v_cartesian_to_polar(double *const BQ_R__ mag,
+                                 double *const BQ_R__ phase,
+                                 const double *const BQ_R__ real,
+                                 const double *const BQ_R__ imag,
+                                 const int count)
+{
+    ippsCartToPolar_64f(real, imag, mag, phase, count);
+}
+
+template<>
+inline void v_cartesian_interleaved_to_polar(double *const BQ_R__ mag,
+                                             double *const BQ_R__ phase,
+                                             const double *const BQ_R__ src,
+                                             const int count)
+{
+    ippsCartToPolar_64fc((const Ipp64fc *)src, mag, phase, count);
+}
+
+#elif defined HAVE_VDSP
+
 template<>
 inline void v_cartesian_to_polar(float *const BQ_R__ mag,
                                  float *const BQ_R__ phase,
@@ -513,6 +596,7 @@
     vvsqrtf(mag, phase, &count); // using phase as the source
     vvatan2f(phase, imag, real, &count);
 }
+
 template<>
 inline void v_cartesian_to_polar(double *const BQ_R__ mag,
                                  double *const BQ_R__ phase,
@@ -528,7 +612,7 @@
     vvsqrt(mag, phase, &count); // using phase as the source
     vvatan2(phase, imag, real, &count);
 }
-#endif
+#endif // HAVE_VDSP
 
 template<typename T>
 void v_cartesian_to_polar_interleaved_inplace(T *const BQ_R__ srcdst,
@@ -542,6 +626,63 @@
     }
 }
 
+template<typename S, typename T> // S source, T target
+void v_cartesian_to_magnitudes(T *const BQ_R__ mag,
+                               const S *const BQ_R__ real,
+                               const S *const BQ_R__ imag,
+                               const int count)
+{
+    for (int i = 0; i < count; ++i) {
+        mag[i] = T(sqrt(real[i] * real[i] + imag[i] * imag[i]));
+    }
+}
+
+template<typename S, typename T> // S source, T target
+void v_cartesian_interleaved_to_magnitudes(T *const BQ_R__ mag,
+                                           const S *const BQ_R__ src,
+                                           const int count)
+{
+    for (int i = 0; i < count; ++i) {
+        mag[i] = T(sqrt(src[i*2] * src[i*2] + src[i*2+1] * src[i*2+1]));
+    }
+}
+
+#ifdef HAVE_IPP
+template<>
+inline void v_cartesian_to_magnitudes(float *const BQ_R__ mag,
+                                      const float *const BQ_R__ real,
+                                      const float *const BQ_R__ imag,
+                                      const int count)
+{
+    ippsMagnitude_32f(real, imag, mag, count);
+}
+
+template<>
+inline void v_cartesian_to_magnitudes(double *const BQ_R__ mag,
+                                      const double *const BQ_R__ real,
+                                      const double *const BQ_R__ imag,
+                                      const int count)
+{
+    ippsMagnitude_64f(real, imag, mag, count);
+}
+
+template<>
+inline void v_cartesian_interleaved_to_magnitudes(float *const BQ_R__ mag,
+                                                  const float *const BQ_R__ src,
+                                                  const int count)
+{
+    ippsMagnitude_32fc((const Ipp32fc *)src, mag, count);
+}
+
+template<>
+inline void v_cartesian_interleaved_to_magnitudes(double *const BQ_R__ mag,
+                                                  const double *const BQ_R__ src,
+                                                  const int count)
+{
+    ippsMagnitude_64fc((const Ipp64fc *)src, mag, count);
+}
+#endif
+
 }
 
 #endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bqvec/build/Makefile.inc	Tue Nov 19 10:13:32 2019 +0000
@@ -0,0 +1,74 @@
+
+SRC_DIR	:= src
+TEST_DIR := test
+HEADER_DIR := bqvec
+
+SOURCES	:= $(wildcard $(SRC_DIR)/*.cpp)
+HEADERS	:= $(wildcard $(HEADER_DIR)/*.h) $(wildcard $(SRC_DIR)/*.h)
+
+OBJECTS	:= $(SOURCES:.cpp=.o)
+OBJECTS	:= $(OBJECTS:.c=.o)
+
+TIMINGS_SOURCES	:= $(TEST_DIR)/Timings.cpp
+TIMINGS_OBJECTS	:= $(TIMINGS_SOURCES:.cpp=.o)
+
+TEST_SOURCES	:= $(wildcard $(TEST_DIR)/*.cpp)
+TEST_OBJECTS	:= $(TEST_SOURCES:.cpp=.o)
+
+OPTFLAGS := -O3 -ffast-math
+
+CXXFLAGS := -std=c++98 -fpic -Wall -Wextra -Werror $(VECTOR_DEFINES) $(ALLOCATOR_DEFINES) -I. $(THIRD_PARTY_INCLUDES) -I$(HEADER_DIR) $(OPTFLAGS)
+
+LIBRARY	:= libbqvec.a
+
+all:	$(LIBRARY) timings
+
+test:	$(LIBRARY) timings test-allocators test-vectorops test-vectorops-complex
+	./test-allocators && ./test-vectorops && ./test-vectorops-complex
+
+valgrind:	$(LIBRARY) timings test-allocators test-vectorops test-vectorops-complex
+	valgrind ./test-allocators && valgrind ./test-vectorops && valgrind ./test-vectorops-complex
+
+$(LIBRARY):	$(OBJECTS)
+	$(AR) rc $@ $^
+
+timings: $(TIMINGS_OBJECTS) $(LIBRARY)
+	$(CXX) $(CXXFLAGS) -o $@ $^ $(THIRD_PARTY_LIBS)
+
+test-allocators:	test/TestAllocators.o $(LIBRARY)
+	$(CXX) $(CXXFLAGS) -o $@ $^ -lboost_unit_test_framework -L. -lbqvec $(THIRD_PARTY_LIBS)
+
+test-vectorops:	test/TestVectorOps.o $(LIBRARY)
+	$(CXX) $(CXXFLAGS) -o $@ $^ -lboost_unit_test_framework -L. -lbqvec $(THIRD_PARTY_LIBS)
+
+test-vectorops-complex:	test/TestVectorOpsComplex.o $(LIBRARY)
+	$(CXX) $(CXXFLAGS) -o $@ $^ -lboost_unit_test_framework -L. -lbqvec $(THIRD_PARTY_LIBS)
+
+clean:		
+	rm -f $(OBJECTS) $(TEST_OBJECTS) $(TIMINGS_OBJECTS)
+
+distclean:	clean
+	rm -f $(LIBRARY) test-allocators test-vectorops test-vectorops-complex
+
+depend:
+	makedepend -Y -fbuild/Makefile.inc $(TIMINGS_SOURCES) $(TEST_SOURCES) $(HEADERS) $(SOURCES)
+
+
+# DO NOT DELETE
+
+test/Timings.o: bqvec/VectorOpsComplex.h bqvec/VectorOps.h bqvec/Restrict.h
+test/Timings.o: bqvec/ComplexTypes.h
+test/Timings.o: bqvec/VectorOpsComplex.h bqvec/VectorOps.h bqvec/Restrict.h
+test/Timings.o: bqvec/ComplexTypes.h
+test/TestVectorOpsComplex.o: bqvec/VectorOpsComplex.h bqvec/VectorOps.h
+test/TestVectorOpsComplex.o: bqvec/Restrict.h bqvec/ComplexTypes.h
+test/TestVectorOpsComplex.o: bqvec/VectorOps.h
+test/TestVectorOps.o: bqvec/VectorOps.h
+test/TestAllocators.o: bqvec/VectorOps.h bqvec/Allocators.h
+bqvec/RingBuffer.o: bqvec/Barrier.h bqvec/Allocators.h bqvec/Restrict.h
+bqvec/RingBuffer.o: bqvec/VectorOps.h
+bqvec/VectorOpsComplex.o: bqvec/VectorOps.h bqvec/Restrict.h
+bqvec/VectorOpsComplex.o: bqvec/ComplexTypes.h
+bqvec/VectorOps.o: bqvec/Restrict.h
+src/Allocators.o: bqvec/Allocators.h
+src/Barrier.o: bqvec/Barrier.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bqvec/build/Makefile.linux	Tue Nov 19 10:13:32 2019 +0000
@@ -0,0 +1,9 @@
+
+VECTOR_DEFINES		:= 
+
+THIRD_PARTY_INCLUDES	:= 
+THIRD_PARTY_LIBS	:= 
+
+ALLOCATOR_DEFINES 	:= -DHAVE_POSIX_MEMALIGN
+
+include build/Makefile.inc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bqvec/build/Makefile.linux.ipp	Tue Nov 19 10:13:32 2019 +0000
@@ -0,0 +1,10 @@
+
+VECTOR_DEFINES		:= -DHAVE_IPP
+
+ALLOCATOR_DEFINES 	:= -DHAVE_POSIX_MEMALIGN
+
+THIRD_PARTY_INCLUDES	:= -I/opt/intel/ipp/include
+THIRD_PARTY_LIBS	:= -L/opt/intel/ipp/lib/intel64_lin -Wl,-Bstatic -lipps -lippvm -lippcore -Wl,-Bdynamic
+
+include build/Makefile.inc
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bqvec/build/Makefile.linux.min	Tue Nov 19 10:13:32 2019 +0000
@@ -0,0 +1,13 @@
+
+# This Makefile resembles the sort of settings one might use on a
+# small device without library support. It's mainly here as another CI
+# test target that is quite different from the default configuration
+
+VECTOR_DEFINES		:= -DNO_EXCEPTIONS -DUSE_SINGLE_PRECISION_COMPLEX -DUSE_POMMIER_MATHFUN -DUSE_APPROXIMATE_ATAN2
+
+ALLOCATOR_DEFINES 	:= -DUSE_OWN_ALIGNED_MALLOC -DLACK_POSIX_MEMALIGN -DMALLOC_IS_NOT_ALIGNED
+
+THIRD_PARTY_INCLUDES	:= 
+THIRD_PARTY_LIBS	:= 
+
+include build/Makefile.inc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bqvec/build/Makefile.osx	Tue Nov 19 10:13:32 2019 +0000
@@ -0,0 +1,9 @@
+
+VECTOR_DEFINES		:= -DHAVE_VDSP
+
+ALLOCATOR_DEFINES 	:= -DMALLOC_IS_ALIGNED
+
+THIRD_PARTY_INCLUDES	:=
+THIRD_PARTY_LIBS	:= -framework Accelerate
+
+include build/Makefile.inc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bqvec/build/run-platform-tests.sh	Tue Nov 19 10:13:32 2019 +0000
@@ -0,0 +1,84 @@
+#!/bin/bash
+
+if [ -z "$1" ]; then
+    echo "Usage: $0 <platformtag>"
+    exit 2
+fi
+
+platformtag="$1"
+
+set -eu
+
+ippdir=/opt/intel/ipp
+
+echo
+if [ -d "$ippdir" ]; then
+    echo "Found IPP directory $ippdir, considering IPP as well as other options"
+else
+    echo "No IPP directory $ippdir, not testing with IPP"
+fi
+
+if valgrind --version >/dev/null 2>&1 ;
+then
+    have_valgrind=yes
+else
+    echo
+    echo "No valgrind executable found, not using valgrind"
+    have_valgrind=no
+fi
+
+tmpfile=$(mktemp "/tmp/test_XXXXXX")
+trap "rm -f $tmpfile" 0
+
+run() {
+    successtext="$1"
+    shift
+    echo -n "Running \"$@\"..."
+    if "$@" > "$tmpfile" 2>&1 ; then
+	if [ -z "$successtext" ] || fgrep -q "$successtext" "$tmpfile" ; then
+	    echo " OK"
+	    return 0
+	else
+	    echo " Failed"
+	    cat "$tmpfile"
+	    return 1
+	fi
+    else
+	echo " Failed"
+	cat "$tmpfile"
+	return 1
+    fi
+}
+
+for mf in Makefile build/Makefile.$platformtag build/Makefile.$platformtag.* ; do
+
+    case "$mf" in
+	*~) continue;;
+	*.bak) continue;;
+	*ipp)
+	    if [ ! -d "$ippdir" ]; then
+		continue
+	    fi;;
+    esac
+
+    if [ ! -f "$mf" ]; then
+	continue
+    fi
+    
+    echo
+    echo "Building and testing with $mf:"
+    echo
+    
+    make -f "$mf" clean >/dev/null
+    run "No errors detected" make -f "$mf" test
+
+    if [ "$have_valgrind" = "yes" ]; then
+	for t in test-* ; do
+	    if [ -x "$t" ]; then
+		run "no leaks are possible" valgrind --leak-check=full ./"$t"
+	    fi
+	done
+    fi
+done
+
+
--- a/bqvec/pommier/sse_mathfun.h	Sat Nov 12 09:59:34 2016 +0000
+++ b/bqvec/pommier/sse_mathfun.h	Tue Nov 19 10:13:32 2019 +0000
@@ -61,7 +61,7 @@
 #define _PI32_CONST(Name, Val)                                            \
   static const ALIGN16_BEG int _pi32_##Name[4] ALIGN16_END = { Val, Val, Val, Val }
 #define _PS_CONST_TYPE(Name, Type, Val)                                 \
-  static const ALIGN16_BEG Type _ps_##Name[4] ALIGN16_END = { Val, Val, Val, Val }
+    static const ALIGN16_BEG Type _ps_##Name[4] ALIGN16_END = { (Type)Val, (Type)Val, (Type)Val, (Type)Val }
 
 _PS_CONST(1  , 1.0f);
 _PS_CONST(0p5, 0.5f);
--- a/bqvec/src/Allocators.cpp	Sat Nov 12 09:59:34 2016 +0000
+++ b/bqvec/src/Allocators.cpp	Tue Nov 19 10:13:32 2019 +0000
@@ -6,7 +6,7 @@
     A small library for vector arithmetic and allocation in C++ using
     raw C pointer arrays.
 
-    Copyright 2007-2014 Particular Programs Ltd.
+    Copyright 2007-2016 Particular Programs Ltd.
 
     Permission is hereby granted, free of charge, to any person
     obtaining a copy of this software and associated documentation
@@ -33,13 +33,15 @@
     Software without prior written authorization.
 */
 
-#include "bqvec/Allocators.h"
+#include "Allocators.h"
 
 #ifdef HAVE_IPP
 #include <ipps.h>
 #endif
 
 #include <iostream>
+#include <climits>
+
 using std::cerr;
 using std::endl;
 
@@ -50,16 +52,52 @@
 template <>
 float *allocate(size_t count)
 {
-    float *ptr = ippsMalloc_32f(count);
-    if (!ptr) throw (std::bad_alloc());
+    if (count > INT_MAX) {
+#ifndef NO_EXCEPTIONS
+        throw std::length_error("Size overflow in allocate");
+#else
+        abort();
+#endif
+    }
+    
+    float *ptr = ippsMalloc_32f(int(count));
+    if (!ptr) {
+#ifndef NO_EXCEPTIONS
+        throw (std::bad_alloc());
+#else
+        abort();
+#endif
+    }
+
+    for (size_t i = 0; i < count; ++i) {
+        new (ptr + i) float;
+    }
     return ptr;
 }
 
 template <>
 double *allocate(size_t count)
 {
-    double *ptr = ippsMalloc_64f(count);
-    if (!ptr) throw (std::bad_alloc());
+    if (count > INT_MAX) {
+#ifndef NO_EXCEPTIONS
+        throw std::length_error("Size overflow in allocate");
+#else
+        abort();
+#endif
+    }
+    
+    double *ptr = ippsMalloc_64f(int(count));
+    if (!ptr) {
+#ifndef NO_EXCEPTIONS
+        throw (std::bad_alloc());
+#else
+        abort();
+#endif
+    }
+
+    for (size_t i = 0; i < count; ++i) {
+        new (ptr + i) double;
+    }
     return ptr;
 }
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bqvec/src/Barrier.cpp	Tue Nov 19 10:13:32 2019 +0000
@@ -0,0 +1,80 @@
+/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
+
+/*
+    bqvec
+
+    A small library for vector arithmetic and allocation in C++ using
+    raw C pointer arrays.
+
+    Copyright 2007-2018 Particular Programs Ltd.
+
+    Permission is hereby granted, free of charge, to any person
+    obtaining a copy of this software and associated documentation
+    files (the "Software"), to deal in the Software without
+    restriction, including without limitation the rights to use, copy,
+    modify, merge, publish, distribute, sublicense, and/or sell copies
+    of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be
+    included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
+    ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+    CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+    WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+    Except as contained in this notice, the names of Chris Cannam and
+    Particular Programs Ltd shall not be used in advertising or
+    otherwise to promote the sale, use or other dealings in this
+    Software without prior written authorization.
+*/
+
+#include "Barrier.h"
+
+#if defined __APPLE__
+#if !defined __MAC_10_12
+#include <libkern/OSAtomic.h>
+#endif
+#endif
+#if defined _WIN32 && defined _MSC_VER
+#include <Windows.h>
+#endif
+
+namespace breakfastquay {
+
+void system_memorybarrier()
+{
+#if defined __APPLE__
+
+#if defined __MAC_10_12
+    __sync_synchronize();
+#else
+    OSMemoryBarrier();
+#endif
+    
+#elif (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)
+
+    __sync_synchronize();
+
+#elif defined _WIN32 
+
+#if defined _MSC_VER
+    MemoryBarrier();
+#else /* (mingw) */
+    LONG Barrier = 0;
+    __asm__ __volatile__("xchgl %%eax,%0 "
+                         : "=r" (Barrier));
+#endif
+
+#else
+#warning "No memory barrier defined"
+#endif
+    
+}
+
+}
+
--- a/bqvec/src/VectorOpsComplex.cpp	Sat Nov 12 09:59:34 2016 +0000
+++ b/bqvec/src/VectorOpsComplex.cpp	Tue Nov 19 10:13:32 2019 +0000
@@ -6,7 +6,7 @@
     A small library for vector arithmetic and allocation in C++ using
     raw C pointer arrays.
 
-    Copyright 2007-2015 Particular Programs Ltd.
+    Copyright 2007-2016 Particular Programs Ltd.
 
     Permission is hereby granted, free of charge, to any person
     obtaining a copy of this software and associated documentation
@@ -33,15 +33,10 @@
     Software without prior written authorization.
 */
 
-#include "bqvec/VectorOpsComplex.h"
+#include "VectorOpsComplex.h"
 
 #include <cassert>
 
-#ifdef __MSVC__
-#include <malloc.h>
-#define alloca _alloca
-#endif
-
 #if defined USE_POMMIER_MATHFUN
 #if defined __ARMEL__
 #include "pommier/neon_mathfun.h"
@@ -50,10 +45,18 @@
 #endif
 #endif
 
-#ifdef __MSVC__
+#if defined(_MSC_VER) || defined(WIN32)
 #include <malloc.h>
+#ifndef alloca
 #define alloca _alloca
 #endif
+#else
+#include <alloca.h>
+#endif
+
+#include <iostream>
+
+using namespace std;
 
 namespace breakfastquay {
 
@@ -86,6 +89,8 @@
             if (imag < 0.f) atan -= pi;
         }
     }
+
+    return atan;
 }
 #endif
 
@@ -216,8 +221,6 @@
 
 #ifndef NO_COMPLEX_TYPES
 
-#if defined HAVE_IPP
-
 void
 v_polar_to_cartesian(bq_complex_t *const BQ_R__ dst,
 		     const bq_complex_element_t *const BQ_R__ mag,
@@ -225,58 +228,52 @@
 		     const int count)
 {
     if (sizeof(bq_complex_element_t) == sizeof(float)) {
-	ippsPolarToCart_32fc((const float *)mag, (const float *)phase,
-                             (Ipp32fc *)dst, count);
+        v_polar_to_cartesian_interleaved((float *)dst,
+                                         (const float *)mag,
+                                         (const float *)phase,
+                                         count);
     } else {
-	ippsPolarToCart_64fc((const double *)mag, (const double *)phase,
-                             (Ipp64fc *)dst, count);
+        v_polar_to_cartesian_interleaved((double *)dst,
+                                         (const double *)mag,
+                                         (const double *)phase,
+                                         count);
     }
 }
 
-#elif defined HAVE_VDSP
+void
+v_cartesian_to_polar(bq_complex_element_t *const BQ_R__ mag,
+                     bq_complex_element_t *const BQ_R__ phase,
+                     const bq_complex_t *const BQ_R__ src,
+                     const int count)
+{
+    if (sizeof(bq_complex_element_t) == sizeof(float)) {
+        v_cartesian_interleaved_to_polar((float *)mag,
+                                         (float *)phase,
+                                         (const float *)src,
+                                         count);
+    } else {
+        v_cartesian_interleaved_to_polar((double *)mag,
+                                         (double *)phase,
+                                         (const double *)src,
+                                         count);
+    }
+}
 
 void
-v_polar_to_cartesian(bq_complex_t *const BQ_R__ dst,
-		     const bq_complex_element_t *const BQ_R__ mag,
-		     const bq_complex_element_t *const BQ_R__ phase,
-		     const int count)
+v_cartesian_to_magnitudes(bq_complex_element_t *const BQ_R__ mag,
+                          const bq_complex_t *const BQ_R__ src,
+                          const int count)
 {
-    bq_complex_element_t *sc = (bq_complex_element_t *)
-	alloca(count * 2 * sizeof(bq_complex_element_t));
-
     if (sizeof(bq_complex_element_t) == sizeof(float)) {
-        vvsincosf((float *)sc, (float *)(sc + count), (float *)phase, &count);
+        v_cartesian_interleaved_to_magnitudes((float *)mag,
+                                              (const float *)src,
+                                              count);
     } else {
-        vvsincos((double *)sc, (double *)(sc + count), (double *)phase, &count);
+        v_cartesian_interleaved_to_magnitudes((double *)mag,
+                                              (const double *)src,
+                                              count);
     }
-
-    int sini = 0;
-    int cosi = count;
-
-    for (int i = 0; i < count; ++i) {
-	dst[i].re = mag[i] * sc[cosi++];
-	dst[i].im = mag[i] * sc[sini++];
-    }
-}    
-
-#else
-
-void
-v_polar_to_cartesian(bq_complex_t *const BQ_R__ dst,
-		     const bq_complex_element_t *const BQ_R__ mag,
-		     const bq_complex_element_t *const BQ_R__ phase,
-		     const int count)
-{
-    for (int i = 0; i < count; ++i) {
-	dst[i] = c_phasor(phase[i]);
-    }
-    for (int i = 0; i < count; ++i) {
-        dst[i].re *= mag[i];
-        dst[i].im *= mag[i];
-    }
-}    
-
-#endif
+}
 
 #if defined USE_POMMIER_MATHFUN
 
@@ -356,7 +353,6 @@
 v_polar_interleaved_to_cartesian_inplace(bq_complex_element_t *const BQ_R__ srcdst,
                                          const int count)
 {
-    // Not ideal
     bq_complex_element_t mag, phase;
     int ii = 0, io = 0;
     for (int i = 0; i < count; ++i) {
@@ -368,6 +364,6 @@
     }
 }
 
-#endif
+#endif // NO_COMPLEX_TYPES
 
 }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bqvec/test/TestAllocators.cpp	Tue Nov 19 10:13:32 2019 +0000
@@ -0,0 +1,85 @@
+/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
+
+#include "bqvec/VectorOps.h"
+#include "bqvec/Allocators.h"
+
+#define BOOST_TEST_DYN_LINK
+#define BOOST_TEST_MAIN
+
+#include <boost/test/unit_test.hpp>
+
+#include <stdexcept>
+#include <vector>
+
+using namespace breakfastquay;
+
+BOOST_AUTO_TEST_SUITE(TestAllocators)
+
+#define COMPARE_ARRAY(a, b)						\
+    for (int cmp_i = 0; cmp_i < (int)(sizeof(a)/sizeof(a[0])); ++cmp_i) { \
+        BOOST_CHECK_SMALL(a[cmp_i] - b[cmp_i], 1e-14);			\
+    }
+
+#define COMPARE_N(a, b, n)						\
+    for (int cmp_i = 0; cmp_i < n; ++cmp_i) { \
+        BOOST_CHECK_SMALL(a[cmp_i] - b[cmp_i], 1e-14);			\
+    }
+
+BOOST_AUTO_TEST_CASE(alloc_dealloc)
+{
+    double *v = allocate<double>(4);
+    v[0] = 0.1;
+    v[1] = 2.0;
+    v[2] = -0.3;
+    v[3] = 4.0;
+    double *e = allocate<double>(4);
+    e[0] = -0.3;
+    e[1] = 4.0;
+    e[2] = 0.1;
+    e[3] = 2.0;
+    v_fftshift(v, 4);
+    COMPARE_N(v, e, 4);
+    deallocate(v);
+    deallocate(e);
+}
+
+BOOST_AUTO_TEST_CASE(alloc_zero)
+{
+    double *v = allocate_and_zero<double>(4);
+    BOOST_CHECK_EQUAL(v[0], 0.f);
+    BOOST_CHECK_EQUAL(v[1], 0.f);
+    BOOST_CHECK_EQUAL(v[2], 0.f);
+    BOOST_CHECK_EQUAL(v[3], 0.f);
+    deallocate(v);
+}
+
+BOOST_AUTO_TEST_CASE(alloc_dealloc_channels)
+{
+    double **v = allocate_channels<double>(2, 4);
+    v[0][0] = 0.1;
+    v[0][1] = 2.0;
+    v[0][2] = -0.3;
+    v[0][3] = 4.0;
+    v[1][0] = -0.3;
+    v[1][1] = 4.0;
+    v[1][2] = 0.1;
+    v[1][3] = 2.0;
+    v_fftshift(v[0], 4);
+    COMPARE_N(v[0], v[1], 4);
+    deallocate_channels(v, 2);
+}
+
+BOOST_AUTO_TEST_CASE(stl)
+{
+    std::vector<double, StlAllocator<double> > v;
+    v.push_back(0.1);
+    v.push_back(2.0);
+    v.push_back(-0.3);
+    v.push_back(4.0);
+    double e[] = { -0.3, 4.0, 0.1, 2.0 };
+    v_fftshift(v.data(), 4);
+    COMPARE_N(v.data(), e, 4);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+
--- a/bqvec/test/TestVectorOps.cpp	Sat Nov 12 09:59:34 2016 +0000
+++ b/bqvec/test/TestVectorOps.cpp	Tue Nov 19 10:13:32 2019 +0000
@@ -1,254 +1,487 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
 
-#include "VectorOpsComplex.h"
+#include "bqvec/VectorOps.h"
 
-#include <iostream>
-#include <cstdlib>
+#define BOOST_TEST_DYN_LINK
+#define BOOST_TEST_MAIN
 
-#include <time.h>
+#include <boost/test/unit_test.hpp>
 
-using namespace std;
+#include <stdexcept>
+#include <vector>
 
-namespace breakfastquay {
+using namespace breakfastquay;
 
-namespace Test {
+BOOST_AUTO_TEST_SUITE(TestVectorOps)
 
-#ifdef _WIN32
-#define drand48() (-1+2*((float)rand())/RAND_MAX)
-#endif
-
-bool
-testMultiply()
-{
-    cerr << "testVectorOps: testing v_multiply complex" << endl;
-
-    const int N = 1024;
-    turbot_complex_sample_t target[N];
-    turbot_complex_sample_t src1[N];
-    turbot_complex_sample_t src2[N];
-
-    for (int i = 0; i < N; ++i) {
-	src1[i].re = drand48();
-	src1[i].im = drand48();
-	src2[i].re = drand48();
-	src2[i].im = drand48();
+#define COMPARE_ARRAY(a, b)						\
+    for (int cmp_i = 0; cmp_i < (int)(sizeof(a)/sizeof(a[0])); ++cmp_i) { \
+        BOOST_CHECK_SMALL(a[cmp_i] - b[cmp_i], 1e-14);			\
     }
 
-    turbot_sample_t mean, first, last, total = 0;
-    for (int i = 0; i < N; ++i) {
-        turbot_complex_sample_t result;
-        c_multiply(result, src1[i], src2[i]);
-	if (i == 0) first = result.re;
-	if (i == N-1) last = result.im;
-	total += result.re;
-	total += result.im;
+#define COMPARE_N(a, b, n)						\
+    for (int cmp_i = 0; cmp_i < n; ++cmp_i) { \
+        BOOST_CHECK_SMALL(a[cmp_i] - b[cmp_i], 1e-14);			\
     }
-    mean = total / (N*2);
-    cerr << "Naive method: mean = " << mean << ", first = " << first
-	 << ", last = " << last << endl;
 
-    v_multiply(target, src1, src2, N);
-    total = 0;
-
-    for (int i = 0; i < N; ++i) {
-	if (i == 0) first = target[i].re;
-	if (i == N-1) last = target[i].im;
-	total += target[i].re;
-	total += target[i].im;
-    }
-    mean = total / (N*2);
-    cerr << "v_multiply: mean = " << mean << ", first = " << first
-	 << ", last = " << last << endl;
-
-    int iterations = 50000;
-    cerr << "Iterations: " << iterations << endl;
-	
-    cerr << "CLOCKS_PER_SEC = " << CLOCKS_PER_SEC << endl;
-    float divisor = float(CLOCKS_PER_SEC) / 1000.f;
-
-    clock_t start = clock();
-
-    for (int j = 0; j < iterations; ++j) {
-	for (int i = 0; i < N; ++i) {
-            c_multiply(target[i], src1[i], src2[i]);
-	}
-    }
-    
-    clock_t end = clock();
-
-    cerr << "Time for naive method: " << float(end - start)/divisor << endl;
-
-    start = clock();
-
-    for (int j = 0; j < iterations; ++j) {
-        v_multiply(target, src1, src2, N);
-    }
-    
-    end = clock();
-
-    cerr << "Time for v_multiply: " << float(end - start)/divisor << endl;
-
-    return true;
+BOOST_AUTO_TEST_CASE(add)
+{
+    double a[] = { 1.0, 2.0, 3.0 };
+    double b[] = { -1.0, 3.0, -4.5 };
+    double expected[] = { 0.0, 5.0, -1.5 };
+    v_add(a, b, 3);
+    COMPARE_N(a, expected, 3);
 }
 
-bool
-testPolarToCart()
+BOOST_AUTO_TEST_CASE(add_with_gain)
 {
-    cerr << "testVectorOps: testing v_polar_to_cartesian" << endl;
-
-    const int N = 1024;
-    turbot_complex_sample_t target[N];
-    turbot_sample_t mag[N];
-    turbot_sample_t phase[N];
-
-    for (int i = 0; i < N; ++i) {
-	mag[i] = drand48();
-	phase[i] = (drand48() * M_PI * 2) - M_PI;
-    }
-
-    turbot_sample_t mean, first, last, total = 0;
-    for (int i = 0; i < N; ++i) {
-	turbot_sample_t real = mag[i] * cos(phase[i]);
-	turbot_sample_t imag = mag[i] * sin(phase[i]);
-	if (i == 0) first = real;
-	if (i == N-1) last = imag;
-	total += real;
-	total += imag;
-    }
-    mean = total / (N*2);
-    cerr << "Naive method: mean = " << mean << ", first = " << first
-	 << ", last = " << last << endl;
-
-    v_polar_to_cartesian(target, mag, phase, N);
-
-    total = 0;
-
-    for (int i = 0; i < N; ++i) {
-	if (i == 0) first = target[i].re;
-	if (i == N-1) last = target[i].im;
-	total += target[i].re;
-	total += target[i].im;
-    }
-    mean = total / (N*2);
-    cerr << "v_polar_to_cartesian: mean = " << mean << ", first = " << first
-	 << ", last = " << last << endl;
-
-    int iterations = 10000;
-    cerr << "Iterations: " << iterations << endl;
-	
-    cerr << "CLOCKS_PER_SEC = " << CLOCKS_PER_SEC << endl;
-    float divisor = float(CLOCKS_PER_SEC) / 1000.f;
-
-    clock_t start = clock();
-
-    for (int j = 0; j < iterations; ++j) {
-	for (int i = 0; i < N; ++i) {
-	    target[i].re = mag[i] * cos(phase[i]);
-	    target[i].im = mag[i] * sin(phase[i]);
-	}
-    }
-    
-    clock_t end = clock();
-
-    cerr << "Time for naive method: " << float(end - start)/divisor << endl;
-
-    start = clock();
-
-    for (int j = 0; j < iterations; ++j) {
-	v_polar_to_cartesian(target, mag, phase, N);
-    }
-    
-    end = clock();
-
-    cerr << "Time for v_polar_to_cartesian: " << float(end - start)/divisor << endl;
-
-    return true;
+    double a[] = { 1.0, 2.0, 3.0 };
+    double b[] = { -1.0, 3.0, -4.5 };
+    double expected[] = { -0.5, 6.5, -3.75 };
+    v_add_with_gain(a, b, 1.5, 3);
+    COMPARE_N(a, expected, 3);
 }
 
-bool
-testPolarToCartInterleaved()
+BOOST_AUTO_TEST_CASE(subtract)
 {
-    cerr << "testVectorOps: testing v_polar_interleaved_to_cartesian" << endl;
-
-    const int N = 1024;
-    turbot_complex_sample_t target[N];
-    turbot_sample_t source[N*2];
-
-    for (int i = 0; i < N; ++i) {
-	source[i*2] = drand48();
-	source[i*2+1] = (drand48() * M_PI * 2) - M_PI;
-    }
-
-    turbot_sample_t mean, first, last, total = 0;
-    for (int i = 0; i < N; ++i) {
-	turbot_sample_t real = source[i*2] * cos(source[i*2+1]);
-	turbot_sample_t imag = source[i*2] * sin(source[i*2+1]);
-	if (i == 0) first = real;
-	if (i == N-1) last = imag;
-	total += real;
-	total += imag;
-    }
-    mean = total / (N*2);
-    cerr << "Naive method: mean = " << mean << ", first = " << first
-	 << ", last = " << last << endl;
-
-    v_polar_interleaved_to_cartesian(target, source, N);
-
-    total = 0;
-
-    for (int i = 0; i < N; ++i) {
-	if (i == 0) first = target[i].re;
-	if (i == N-1) last = target[i].im;
-	total += target[i].re;
-	total += target[i].im;
-    }
-    mean = total / (N*2);
-    cerr << "v_polar_interleaved_to_cartesian: mean = " << mean << ", first = " << first
-	 << ", last = " << last << endl;
-
-    int iterations = 10000;
-    cerr << "Iterations: " << iterations << endl;
-	
-    cerr << "CLOCKS_PER_SEC = " << CLOCKS_PER_SEC << endl;
-    float divisor = float(CLOCKS_PER_SEC) / 1000.f;
-
-    clock_t start = clock();
-
-    for (int j = 0; j < iterations; ++j) {
-	for (int i = 0; i < N; ++i) {
-	    target[i].re = source[i*2] * cos(source[i*2+1]);
-	    target[i].im = source[i*2] * sin(source[i*2+1]);
-	}
-    }
-    
-    clock_t end = clock();
-
-    cerr << "Time for naive method: " << float(end - start)/divisor << endl;
-
-    start = clock();
-
-    for (int j = 0; j < iterations; ++j) {
-	v_polar_interleaved_to_cartesian(target, source, N);
-    }
-    
-    end = clock();
-
-    cerr << "Time for v_polar_interleaved_to_cartesian: " << float(end - start)/divisor << endl;
-
-    return true;
+    double a[] = { 1.0, 2.0, 3.0 };
+    double b[] = { -1.0, 3.0, -4.5 };
+    double expected[] = { 2.0, -1.0, 7.5 };
+    v_subtract(a, b, 3);
+    COMPARE_N(a, expected, 3);
 }
 
-bool
-testVectorOps()
+BOOST_AUTO_TEST_CASE(increment)
 {
-    if (!testMultiply()) return false;
-    if (!testPolarToCart()) return false;
-    if (!testPolarToCartInterleaved()) return false;
-    
-    return true;
+    double a[] = { -1.0, 3.0, -4.5 };
+    double incr = -0.5;
+    double expected[] = { -1.5, 2.5, -5.0 };
+    v_increment(a, incr, 3);
+    COMPARE_N(a, expected, 3);
 }
 
+BOOST_AUTO_TEST_CASE(scale)
+{
+    double a[] = { -1.0, 3.0, -4.5 };
+    double scale = -0.5;
+    double expected[] = { 0.5, -1.5, 2.25 };
+    v_scale(a, scale, 3);
+    COMPARE_N(a, expected, 3);
 }
 
+BOOST_AUTO_TEST_CASE(multiply)
+{
+    double a[] = { 1.0, 2.0, 3.0 };
+    double b[] = { -1.0, 3.0, -4.5 };
+    double expected[] = { -1.0, 6.0, -13.5 };
+    v_multiply(a, b, 3);
+    COMPARE_N(a, expected, 3);
 }
 
+BOOST_AUTO_TEST_CASE(multiply_to)
+{
+    double a[] = { 1.0, 2.0, 3.0 };
+    double b[] = { -1.0, 3.0, -4.5 };
+    double o[3];
+    double expected[] = { -1.0, 6.0, -13.5 };
+    v_multiply_to(o, a, b, 3);
+    COMPARE_N(o, expected, 3);
+}
+
+BOOST_AUTO_TEST_CASE(multiply_and_add)
+{
+    double a[] = { 1.0, 2.0, 3.0 };
+    double b[] = { -1.0, 3.0, -4.5 };
+    double c[] = { 3.0, -1.0, 4.0 };
+    double expected[] = { 2.0, 5.0, -9.5 };
+    v_multiply_and_add(c, a, b, 3);
+    COMPARE_N(c, expected, 3);
+}
+
+BOOST_AUTO_TEST_CASE(divide)
+{
+    double a[] = { 1.0, 2.0, 3.0 };
+    double b[] = { -1.0, 3.0, -4.5 };
+    double expected[] = { -1.0, 2.0/3.0, 3.0/-4.5 };
+    v_divide(a, b, 3);
+    COMPARE_N(a, expected, 3);
+}
+
+BOOST_AUTO_TEST_CASE(sum)
+{
+    double a[] = { 1.0, 2.0, -3.5 };
+    double s = v_sum(a, 3);
+    BOOST_CHECK_EQUAL(s, -0.5);
+}
+
+BOOST_AUTO_TEST_CASE(multiply_and_sum)
+{
+    double a[] = { 2.0, 0.0, -1.5 };
+    double b[] = { 3.0, 4.0, 5.0 };
+    double s = v_multiply_and_sum(a, b, 3);
+    BOOST_CHECK_EQUAL(s, -1.5);
+}
+
+BOOST_AUTO_TEST_CASE(log)
+{
+    double a[] = { 1.0, 1.0 / M_E, M_E };
+    double expected[] = { 0.0, -1.0, 1.0 };
+    v_log(a, 3);
+    COMPARE_N(a, expected, 3);
+}
+
+BOOST_AUTO_TEST_CASE(exp)
+{
+    double a[] = { 0.0, -1.0, 2.0 };
+    double expected[] = { 1.0, 1.0 / M_E, M_E * M_E };
+    v_exp(a, 3);
+    COMPARE_N(a, expected, 3);
+}
+
+BOOST_AUTO_TEST_CASE(sqrt)
+{
+    double a[] = { 0.0, 1.0, 4.0 };
+    double expected[] = { 0.0, 1.0, 2.0 };
+    v_sqrt(a, 3);
+    COMPARE_N(a, expected, 3);
+}
+
+BOOST_AUTO_TEST_CASE(square)
+{
+    double a[] = { 0.0, 1.5, -2.0 };
+    double expected[] = { 0.0, 2.25, 4.0 };
+    v_square(a, 3);
+    COMPARE_N(a, expected, 3);
+}
+
+BOOST_AUTO_TEST_CASE(abs)
+{
+    double a[] = { -1.9, 0.0, 0.01, -0.0 };
+    double expected[] = { 1.9, 0.0, 0.01, 0.0 };
+    v_abs(a, 4);
+    COMPARE_N(a, expected, 4);
+}
+
+BOOST_AUTO_TEST_CASE(mean)
+{
+    double a[] = { -1.0, 1.6, 3.0 };
+    double s = v_mean(a, 3);
+    BOOST_CHECK_EQUAL(s, 1.2);
+}
+
+BOOST_AUTO_TEST_CASE(interleave_1)
+{
+    double a[] = { 1.0, 2.0, 3.0 };
+    double *ch[] = { a };
+    double o[3];
+    double expected[] = { 1.0, 2.0, 3.0 };
+    v_interleave(o, ch, 1, 3);
+    COMPARE_N(o, expected, 3);
+}
+
+BOOST_AUTO_TEST_CASE(interleave_2)
+{
+    double a[] = { 1.0, 2.0, 3.0 };
+    double b[] = { 4.0, 5.0, 6.0 };
+    double *ch[] = { a, b };
+    double o[6];
+    double expected[] = { 1.0, 4.0, 2.0, 5.0, 3.0, 6.0 };
+    v_interleave(o, ch, 2, 3);
+    COMPARE_N(o, expected, 6);
+}
+
+BOOST_AUTO_TEST_CASE(interleave_3)
+{
+    double a[] = { 1.0, 2.0 };
+    double b[] = { 3.0, 4.0 };
+    double c[] = { 5.0, 6.0 };
+    double *ch[] = { a, b, c };
+    double o[6];
+    double expected[] = { 1.0, 3.0, 5.0, 2.0, 4.0, 6.0 };
+    v_interleave(o, ch, 3, 2);
+    COMPARE_N(o, expected, 6);
+}
+
+BOOST_AUTO_TEST_CASE(deinterleave_1)
+{
+    double a[] = { 1.0, 2.0, 3.0 };
+    double o[3];
+    double *oo[] = { o };
+    double *expected[] = { a };
+    v_deinterleave(oo, a, 1, 3);
+    COMPARE_N(oo[0], expected[0], 3);
+}
+
+BOOST_AUTO_TEST_CASE(deinterleave_2)
+{
+    double a[] = { 1.0, 4.0, 2.0, 5.0, 3.0, 6.0 };
+    double o1[3], o2[3];
+    double *oo[] = { o1, o2 };
+    double e1[] = { 1.0, 2.0, 3.0 }, e2[] = { 4.0, 5.0, 6.0 };
+    double *expected[] = { e1, e2 };
+    v_deinterleave(oo, a, 2, 3);
+    COMPARE_N(oo[0], expected[0], 3);
+    COMPARE_N(oo[1], expected[1], 3);
+}
+
+BOOST_AUTO_TEST_CASE(deinterleave_3)
+{
+    double a[] = { 1.0, 3.0, 5.0, 2.0, 4.0, 6.0 };
+    double o1[2], o2[2], o3[2];
+    double *oo[] = { o1, o2, o3 };
+    double e1[] = { 1.0, 2.0 }, e2[] = { 3.0, 4.0 }, e3[] = { 5.0, 6.0 };
+    double *expected[] = { e1, e2, e3 };
+    v_deinterleave(oo, a, 3, 2);
+    COMPARE_N(oo[0], expected[0], 2);
+    COMPARE_N(oo[1], expected[1], 2);
+    COMPARE_N(oo[2], expected[2], 2);
+}
+
+BOOST_AUTO_TEST_CASE(mix_1)
+{
+    double a[] = { 1.0, 2.0, 3.0 };
+    double *ch[] = { a };
+    double o[3];
+    double expected[] = { 1.0, 2.0, 3.0 };
+    v_mix(o, ch, 1, 3);
+    COMPARE_N(o, expected, 3);
+}
+
+BOOST_AUTO_TEST_CASE(mix_2)
+{
+    double a[] = { 1.0, 2.0, 3.0 };
+    double b[] = { 4.0, 5.0, 6.0 };
+    double *ch[] = { a, b };
+    double o[6];
+    double expected[] = { 2.5, 3.5, 4.5 };
+    v_mix(o, ch, 2, 3);
+    COMPARE_N(o, expected, 3);
+}
+
+BOOST_AUTO_TEST_CASE(mix_3)
+{
+    double a[] = { 1.0, 2.0 };
+    double b[] = { 3.0, 4.0 };
+    double c[] = { 5.0, 6.0 };
+    double *ch[] = { a, b, c };
+    double o[6];
+    double expected[] = { 3.0, 4.0 };
+    v_mix(o, ch, 3, 2);
+    COMPARE_N(o, expected, 2);
+}
+
+BOOST_AUTO_TEST_CASE(reconfigure_1_2)
+{
+    double a[] = { 1.0, 2.0, 3.0 };
+    double *aa[] = { a };
+    double o1[3], o2[3];
+    double *oo[] = { o1, o2 };
+    double e1[] = { 1.0, 2.0, 3.0 };
+    double e2[] = { 1.0, 2.0, 3.0 };
+    double *expected[] = { e1, e2 };
+    v_reconfigure_channels(oo, 2, aa, 1, 3);
+    COMPARE_N(oo[0], expected[0], 3);
+    COMPARE_N(oo[1], expected[1], 3);
+}
+
+BOOST_AUTO_TEST_CASE(reconfigure_2_1)
+{
+    double a1[] = { 1.0, 2.0, 3.0 };
+    double a2[] = { 4.0, 5.0, 6.0 };
+    double *aa[] = { a1, a2 };
+    double o1[3];
+    double *oo[] = { o1 };
+    double e1[] = { 2.5, 3.5, 4.5 };
+    double *expected[] = { e1 };
+    v_reconfigure_channels(oo, 1, aa, 2, 3);
+    COMPARE_N(oo[0], expected[0], 3);
+}
+
+BOOST_AUTO_TEST_CASE(reconfigure_3_1)
+{
+    double a1[] = { 1.0, 2.0 };
+    double a2[] = { 3.0, 4.0 };
+    double a3[] = { 5.0, 6.0 };
+    double *aa[] = { a1, a2, a3 };
+    double o1[2];
+    double *oo[] = { o1 };
+    double e1[] = { 3.0, 4.0 };
+    double *expected[] = { e1 };
+    v_reconfigure_channels(oo, 1, aa, 3, 2);
+    COMPARE_N(oo[0], expected[0], 2);
+}
+
+BOOST_AUTO_TEST_CASE(reconfigure_1_3)
+{
+    double a[] = { 1.0, 2.0, 3.0 };
+    double *aa[] = { a };
+    double o1[3], o2[3], o3[3];
+    double *oo[] = { o1, o2, o3 };
+    double e1[] = { 1.0, 2.0, 3.0 };
+    double e2[] = { 1.0, 2.0, 3.0 };
+    double e3[] = { 1.0, 2.0, 3.0 };
+    double *expected[] = { e1, e2, e3 };
+    v_reconfigure_channels(oo, 3, aa, 1, 3);
+    COMPARE_N(oo[0], expected[0], 3);
+    COMPARE_N(oo[1], expected[1], 3);
+    COMPARE_N(oo[2], expected[2], 3);
+}
+
+BOOST_AUTO_TEST_CASE(reconfigure_2_3)
+{
+    double a1[] = { 1.0, 2.0, 3.0 };
+    double a2[] = { 4.0, 5.0, 6.0 };
+    double *aa[] = { a1, a2 };
+    double o1[3], o2[3], o3[3];
+    double *oo[] = { o1, o2, o3 };
+    double e1[] = { 1.0, 2.0, 3.0 };
+    double e2[] = { 0.0, 0.0, 0.0 };
+    double e3[] = { 4.0, 5.0, 6.0 };
+    double *expected[] = { e1, e2, e3 };
+    v_reconfigure_channels(oo, 3, aa, 2, 3);
+    COMPARE_N(oo[0], expected[0], 3);
+    COMPARE_N(oo[1], expected[1], 3);
+    COMPARE_N(oo[2], expected[2], 3);
+}
+
+BOOST_AUTO_TEST_CASE(reconfigure_3_2)
+{
+    double a1[] = { 1.0, 2.0, 3.0 };
+    double a2[] = { 4.0, 5.0, 6.0 };
+    double a3[] = { 7.0, 8.0, 9.0 };
+    double *aa[] = { a1, a2, a3 };
+    double o1[3], o2[3];
+    double *oo[] = { o1, o2 };
+    double e1[] = { 1.0, 2.0, 3.0 };
+    double e2[] = { 7.0, 8.0, 9.0 };
+    double *expected[] = { e1, e2 };
+    v_reconfigure_channels(oo, 2, aa, 3, 3);
+    COMPARE_N(oo[0], expected[0], 3);
+    COMPARE_N(oo[1], expected[1], 3);
+}
+
+BOOST_AUTO_TEST_CASE(reconfigure_3_3)
+{
+    double a1[] = { 1.0, 2.0, 3.0 };
+    double a2[] = { 4.0, 5.0, 6.0 };
+    double a3[] = { 7.0, 8.0, 9.0 };
+    double *aa[] = { a1, a2, a3 };
+    double o1[3], o2[3], o3[3];
+    double *oo[] = { o1, o2, o3 };
+    double e1[] = { 1.0, 2.0, 3.0 };
+    double e2[] = { 4.0, 5.0, 6.0 };
+    double e3[] = { 7.0, 8.0, 9.0 };
+    double *expected[] = { e1, e2, e3 };
+    v_reconfigure_channels(oo, 3, aa, 3, 3);
+    COMPARE_N(oo[0], expected[0], 3);
+    COMPARE_N(oo[1], expected[1], 3);
+    COMPARE_N(oo[2], expected[2], 3);
+}
+
+BOOST_AUTO_TEST_CASE(reconfigure_1_2_inplace)
+{
+    double a1[] = { 1.0, 2.0, 3.0 };
+    double a2[3];
+    double *aa[] = { a1, a2 };
+    double e1[] = { 1.0, 2.0, 3.0 };
+    double e2[] = { 1.0, 2.0, 3.0 };
+    double *expected[] = { e1, e2 };
+    v_reconfigure_channels_inplace(aa, 2, 1, 3);
+    COMPARE_N(aa[0], expected[0], 3);
+    COMPARE_N(aa[1], expected[1], 3);
+}
+
+BOOST_AUTO_TEST_CASE(reconfigure_2_1_inplace)
+{
+    double a1[] = { 1.0, 2.0, 3.0 };
+    double a2[] = { 4.0, 5.0, 6.0 };
+    double *aa[] = { a1, a2 };
+    double e1[] = { 2.5, 3.5, 4.5 };
+    double *expected[] = { e1 };
+    v_reconfigure_channels_inplace(aa, 1, 2, 3);
+    COMPARE_N(aa[0], expected[0], 3);
+}
+
+BOOST_AUTO_TEST_CASE(reconfigure_3_1_inplace)
+{
+    double a1[] = { 1.0, 2.0 };
+    double a2[] = { 3.0, 4.0 };
+    double a3[] = { 5.0, 6.0 };
+    double *aa[] = { a1, a2, a3 };
+    double e1[] = { 3.0, 4.0 };
+    double *expected[] = { e1 };
+    v_reconfigure_channels_inplace(aa, 1, 3, 2);
+    COMPARE_N(aa[0], expected[0], 2);
+}
+
+BOOST_AUTO_TEST_CASE(reconfigure_1_3_inplace)
+{
+    double a1[] = { 1.0, 2.0, 3.0 };
+    double a2[3], a3[3];
+    double *aa[] = { a1, a2, a3 };
+    double e1[] = { 1.0, 2.0, 3.0 };
+    double e2[] = { 1.0, 2.0, 3.0 };
+    double e3[] = { 1.0, 2.0, 3.0 };
+    double *expected[] = { e1, e2, e3 };
+    v_reconfigure_channels_inplace(aa, 3, 1, 3);
+    COMPARE_N(aa[0], expected[0], 3);
+    COMPARE_N(aa[1], expected[1], 3);
+    COMPARE_N(aa[2], expected[2], 3);
+}
+
+BOOST_AUTO_TEST_CASE(reconfigure_2_3_inplace)
+{
+    double a1[] = { 1.0, 2.0, 3.0 };
+    double a2[] = { 4.0, 5.0, 6.0 };
+    double a3[3];
+    double *aa[] = { a1, a2, a3 };
+    double e1[] = { 1.0, 2.0, 3.0 };
+    double e2[] = { 4.0, 5.0, 6.0 };
+    double e3[] = { 0.0, 0.0, 0.0 };
+    double *expected[] = { e1, e2, e3 };
+    v_reconfigure_channels_inplace(aa, 3, 2, 3);
+    COMPARE_N(aa[0], expected[0], 3);
+    COMPARE_N(aa[1], expected[1], 3);
+    COMPARE_N(aa[2], expected[2], 3);
+}
+
+BOOST_AUTO_TEST_CASE(reconfigure_3_2_inplace)
+{
+    double a1[] = { 1.0, 2.0, 3.0 };
+    double a2[] = { 4.0, 5.0, 6.0 };
+    double a3[] = { 7.0, 8.0, 9.0 };
+    double *aa[] = { a1, a2, a3 };
+    double e1[] = { 1.0, 2.0, 3.0 };
+    double e2[] = { 4.0, 5.0, 6.0 };
+    double *expected[] = { e1, e2 };
+    v_reconfigure_channels_inplace(aa, 2, 3, 3);
+    COMPARE_N(aa[0], expected[0], 3);
+    COMPARE_N(aa[1], expected[1], 3);
+}
+
+BOOST_AUTO_TEST_CASE(reconfigure_3_3_inplace)
+{
+    double a1[] = { 1.0, 2.0, 3.0 };
+    double a2[] = { 4.0, 5.0, 6.0 };
+    double a3[] = { 7.0, 8.0, 9.0 };
+    double *aa[] = { a1, a2, a3 };
+    double e1[] = { 1.0, 2.0, 3.0 };
+    double e2[] = { 4.0, 5.0, 6.0 };
+    double e3[] = { 7.0, 8.0, 9.0 };
+    double *expected[] = { e1, e2, e3 };
+    v_reconfigure_channels_inplace(aa, 3, 3, 3);
+    COMPARE_N(aa[0], expected[0], 3);
+    COMPARE_N(aa[1], expected[1], 3);
+    COMPARE_N(aa[2], expected[2], 3);
+}
+
+BOOST_AUTO_TEST_CASE(fftshift)
+{
+    double a[] = { 0.1, 2.0, -0.3, 4.0 };
+    double e[] = { -0.3, 4.0, 0.1, 2.0 };
+    v_fftshift(a, 4);
+    COMPARE_N(a, e, 4);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+
--- a/bqvec/test/TestVectorOps.h	Sat Nov 12 09:59:34 2016 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,12 +0,0 @@
-/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
-
-namespace breakfastquay {
-
-namespace Test {
-
-bool testVectorOps();
-
-}
-
-}
-
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bqvec/test/TestVectorOpsComplex.cpp	Tue Nov 19 10:13:32 2019 +0000
@@ -0,0 +1,213 @@
+/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
+
+#include "bqvec/VectorOpsComplex.h"
+#include "bqvec/VectorOps.h"
+
+#define BOOST_TEST_DYN_LINK
+#define BOOST_TEST_MAIN
+
+#include <boost/test/unit_test.hpp>
+
+#include <stdexcept>
+#include <vector>
+#include <iostream>
+
+using namespace breakfastquay;
+
+using namespace std;
+
+BOOST_AUTO_TEST_SUITE(TestVectorOpsComplex)
+
+#ifdef USE_APPROXIMATE_ATAN2
+static const double eps = 5.0e-3;
+#else
+#ifdef USE_SINGLE_PRECISION_COMPLEX
+static const double eps = 1.0e-7;
+#else
+static const double eps = 1.0e-14;
+#endif
+#endif
+    
+#define COMPARE_N(a, b, n) \
+    for (int cmp_i = 0; cmp_i < n; ++cmp_i) { \
+        BOOST_CHECK_SMALL(a[cmp_i] - b[cmp_i], eps); \
+    }
+
+#define COMPARE_NC(a, b, n) \
+    for (int cmp_i = 0; cmp_i < n; ++cmp_i) { \
+        BOOST_CHECK_SMALL(a[cmp_i] - b[cmp_i], (bq_complex_element_t) eps); \
+    }
+
+#define COMPARE_CPLX_N(a, b, n)						\
+    for (int cmp_i = 0; cmp_i < n; ++cmp_i) { \
+        BOOST_CHECK_SMALL(a[cmp_i].re - b[cmp_i].re, (bq_complex_element_t) eps); \
+        BOOST_CHECK_SMALL(a[cmp_i].im - b[cmp_i].im, (bq_complex_element_t) eps); \
+    }
+
+BOOST_AUTO_TEST_CASE(add)
+{
+    bq_complex_t a[] = { { 1.0, 2.0 }, { 3.0, -4.0 } };
+    bq_complex_t b[] = { { -1.0, 3.0 }, { -4.5, 0.0 } };
+    bq_complex_t expected[] = { { 0.0, 5.0 }, { -1.5, -4.0 } };
+    v_add(a, b, 2);
+    COMPARE_CPLX_N(a, expected, 2);
+}
+
+BOOST_AUTO_TEST_CASE(add_with_gain)
+{
+    bq_complex_t a[] = { { 1.0, 2.0 }, { 3.0, -4.0 } };
+    bq_complex_t b[] = { { -1.0, 3.0 }, { -4.5, 0.0 } };
+    bq_complex_t expected[] = { { -0.5, 6.5 }, { -3.75, -4.0 } };
+    v_add_with_gain(a, b, (bq_complex_element_t) 1.5, 2);
+    COMPARE_CPLX_N(a, expected, 2);
+}
+
+BOOST_AUTO_TEST_CASE(multiply)
+{
+    bq_complex_t a[] = { { 1.0, 2.0 }, { 3.0, -4.0 } };
+    bq_complex_t b[] = { { -1.0, 3.0 }, { -4.5, 0.0 } };
+    bq_complex_t expected[] = { { -7.0, 1.0 }, { -13.5, 18.0 } };
+    v_multiply(a, b, 2);
+    COMPARE_CPLX_N(a, expected, 2);
+}
+
+BOOST_AUTO_TEST_CASE(multiply_to)
+{
+    bq_complex_t a[] = { { 1.0, 2.0 }, { 3.0, -4.0 } };
+    bq_complex_t b[] = { { -1.0, 3.0 }, { -4.5, 0.0 } };
+    bq_complex_t o[2];
+    bq_complex_t expected[] = { { -7.0, 1.0 }, { -13.5, 18.0 } };
+    v_multiply_to(o, a, b, 2);
+    COMPARE_CPLX_N(o, expected, 2);
+}
+
+BOOST_AUTO_TEST_CASE(cartesian_to_magnitudes_bq)
+{
+    bq_complex_t a[] = { { 1.0, 2.0 }, { 3.0, -4.0 } };
+    bq_complex_element_t o[2];
+    bq_complex_element_t expected[] = { sqrt(5.0), 5.0 };
+    v_cartesian_to_magnitudes(o, a, 2);
+    COMPARE_NC(o, expected, 2);
+}
+
+BOOST_AUTO_TEST_CASE(cartesian_to_magnitudes)
+{
+    double re[] = { 1.0, 3.0 };
+    double im[] = { 2.0, -4.0 };
+    double o[2];
+    double expected[] = { sqrt(5.0), 5.0 };
+    v_cartesian_to_magnitudes(o, re, im, 2);
+    COMPARE_N(o, expected, 2);
+}
+
+BOOST_AUTO_TEST_CASE(cartesian_interleaved_to_magnitudes)
+{
+    double a[] = { 1.0, 2.0, 3.0, -4.0 };
+    double o[2];
+    double expected[] = { sqrt(5.0), 5.0 };
+    v_cartesian_interleaved_to_magnitudes(o, a, 2);
+    COMPARE_N(o, expected, 2);
+}
+
+BOOST_AUTO_TEST_CASE(cartesian_to_polar_bq)
+{
+    bq_complex_t a[] = { { 0.0, 0.0 }, { 1.0, 1.0 }, { 0.0, -1.0 } };
+    bq_complex_element_t mo[3], po[3];
+    bq_complex_element_t me[] = { 0.0, sqrt(2.0), 1.0 };
+    bq_complex_element_t pe[] = { 0.0, M_PI / 4.0, -M_PI * 0.5 };
+    v_cartesian_to_polar(mo, po, a, 3);
+    COMPARE_NC(mo, me, 3);
+    COMPARE_NC(po, pe, 3);
+}
+
+BOOST_AUTO_TEST_CASE(cartesian_to_polar_interleaved_bq)
+{
+    bq_complex_t a[] = { { 0.0, 0.0 }, { 1.0, 1.0 }, { 0.0, -1.0 } };
+    bq_complex_element_t o[6];
+    bq_complex_element_t e[] = { 0.0, 0.0, sqrt(2.0), M_PI / 4.0, 1.0, -M_PI * 0.5 };
+    v_cartesian_to_polar_interleaved(o, a, 3);
+    COMPARE_NC(o, e, 6);
+}
+
+BOOST_AUTO_TEST_CASE(cartesian_to_polar)
+{
+    double re[] = { 0.0, 1.0, 0.0 };
+    double im[] = { 0.0, 1.0, -1.0 };
+    double mo[3], po[3];
+    double me[] = { 0.0, sqrt(2.0), 1.0 };
+    double pe[] = { 0.0, M_PI / 4.0, -M_PI * 0.5 };
+    v_cartesian_to_polar(mo, po, re, im, 3);
+    COMPARE_N(mo, me, 3);
+    COMPARE_N(po, pe, 3);
+}
+
+BOOST_AUTO_TEST_CASE(cartesian_to_polar_interleaved_inplace)
+{
+    double a[] = { 0.0, 0.0, 1.0, 1.0, 0.0, -1.0 };
+    double e[] = { 0.0, 0.0, sqrt(2.0), M_PI / 4.0, 1.0, -M_PI * 0.5 };
+    v_cartesian_to_polar_interleaved_inplace(a, 3);
+    COMPARE_N(a, e, 6);
+}
+
+BOOST_AUTO_TEST_CASE(cartesian_interleaved_to_polar)
+{
+    double a[] = { 0.0, 0.0, 1.0, 1.0, 0.0, -1.0 };
+    double mo[3], po[3];
+    double me[] = { 0.0, sqrt(2.0), 1.0 };
+    double pe[] = { 0.0, M_PI / 4.0, -M_PI * 0.5 };
+    v_cartesian_interleaved_to_polar(mo, po, a, 3);
+    COMPARE_N(mo, me, 3);
+    COMPARE_N(po, pe, 3);
+}
+
+BOOST_AUTO_TEST_CASE(polar_to_cartesian_bq)
+{
+    bq_complex_element_t m[] = { 0.0, sqrt(2.0), 1.0 };
+    bq_complex_element_t p[] = { 0.0, M_PI / 4.0, -M_PI * 0.5 };
+    bq_complex_t o[3];
+    bq_complex_t e[] = { { 0.0, 0.0 }, { 1.0, 1.0 }, { 0.0, -1.0 } };
+    v_polar_to_cartesian(o, m, p, 3);
+    COMPARE_CPLX_N(o, e, 3);
+}
+
+BOOST_AUTO_TEST_CASE(polar_to_cartesian_interleaved_bq)
+{
+    bq_complex_t a[] = { { 0.0, 0.0 }, { 1.0, 1.0 }, { 0.0, -1.0 } };
+    bq_complex_element_t o[6];
+    bq_complex_element_t e[] = { 0.0, 0.0, sqrt(2.0), M_PI / 4.0, 1.0, -M_PI * 0.5 };
+    v_cartesian_to_polar_interleaved(o, a, 3);
+    COMPARE_NC(o, e, 6);
+}
+
+BOOST_AUTO_TEST_CASE(polar_to_cartesian)
+{
+    double m[] = { 0.0, sqrt(2.0), 1.0 };
+    double p[] = { 0.0, M_PI / 4.0, -M_PI * 0.5 };
+    double ro[3], io[3];
+    double re[] = { 0.0, 1.0, 0.0 };
+    double ie[] = { 0.0, 1.0, -1.0 };
+    v_polar_to_cartesian(ro, io, m, p, 3);
+    COMPARE_N(ro, re, 3);
+    COMPARE_N(io, ie, 3);
+}
+
+BOOST_AUTO_TEST_CASE(polar_to_cartesian_interleaved_inplace)
+{
+    double a[] = { 0.0, 0.0, sqrt(2.0), M_PI / 4.0, 1.0, -M_PI * 0.5 };
+    double e[] = { 0.0, 0.0, 1.0, 1.0, 0.0, -1.0 };
+    v_polar_interleaved_to_cartesian_inplace(a, 3);
+    COMPARE_N(a, e, 6);
+}
+
+BOOST_AUTO_TEST_CASE(polar_to_cartesian_interleaved)
+{
+    double m[] = { 0.0, sqrt(2.0), 1.0 };
+    double p[] = { 0.0, M_PI / 4.0, -M_PI * 0.5 };
+    double o[6];
+    double e[] = { 0.0, 0.0, 1.0, 1.0, 0.0, -1.0 };
+    v_polar_to_cartesian_interleaved(o, m, p, 3);
+    COMPARE_N(o, e, 6);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bqvec/test/Timings.cpp	Tue Nov 19 10:13:32 2019 +0000
@@ -0,0 +1,323 @@
+/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
+
+#include "bqvec/VectorOpsComplex.h"
+
+#include <iostream>
+#include <cstdlib>
+
+#include <time.h>
+
+using namespace std;
+using namespace breakfastquay;
+
+//!!! This is nonsense. TODO: Replace it with sense.
+
+#ifdef _WIN32
+#define drand48() (-1+2*((float)rand())/RAND_MAX)
+#endif
+
+bool
+testMultiply()
+{
+    cerr << "testVectorOps: testing v_multiply complex" << endl;
+
+    const int N = 1024;
+    //!!! todo: use aligned allocate(), otherwise results will vary randomly
+    bq_complex_t target[N];
+    bq_complex_t src1[N];
+    bq_complex_t src2[N];
+
+    for (int i = 0; i < N; ++i) {
+	src1[i].re = drand48();
+	src1[i].im = drand48();
+	src2[i].re = drand48();
+	src2[i].im = drand48();
+    }
+
+    double mean, first, last, total = 0;
+    for (int i = 0; i < N; ++i) {
+        bq_complex_t result;
+        c_multiply(result, src1[i], src2[i]);
+	if (i == 0) first = result.re;
+	if (i == N-1) last = result.im;
+	total += result.re;
+	total += result.im;
+    }
+    mean = total / (N*2);
+    cerr << "Naive method: mean = " << mean << ", first = " << first
+	 << ", last = " << last << endl;
+
+    v_multiply_to(target, src1, src2, N);
+    total = 0;
+
+    for (int i = 0; i < N; ++i) {
+	if (i == 0) first = target[i].re;
+	if (i == N-1) last = target[i].im;
+	total += target[i].re;
+	total += target[i].im;
+    }
+    mean = total / (N*2);
+    cerr << "v_multiply: mean = " << mean << ", first = " << first
+	 << ", last = " << last << endl;
+
+    int iterations = 50000;
+//    cerr << "Iterations: " << iterations << endl;
+	
+//    cerr << "CLOCKS_PER_SEC = " << CLOCKS_PER_SEC << endl;
+    float divisor = float(CLOCKS_PER_SEC) / 1000.f;
+
+    clock_t start = clock();
+
+    for (int j = 0; j < iterations; ++j) {
+	for (int i = 0; i < N; ++i) {
+            c_multiply(target[i], src1[i], src2[i]);
+	}
+    }
+    
+    clock_t end = clock();
+
+    cerr << "Time for naive method: " << float(end - start)/divisor << endl;
+
+    start = clock();
+
+    for (int j = 0; j < iterations; ++j) {
+        v_multiply_to(target, src1, src2, N);
+    }
+    
+    end = clock();
+
+    cerr << "Time for v_multiply: " << float(end - start)/divisor << endl;
+
+    return true;
+}
+
+bool
+testPolarToCart()
+{
+    cerr << "testVectorOps: testing v_polar_to_cartesian" << endl;
+
+    const int N = 1024;
+    bq_complex_t target[N];
+    bq_complex_element_t mag[N];
+    bq_complex_element_t phase[N];
+
+    for (int i = 0; i < N; ++i) {
+	mag[i] = drand48();
+	phase[i] = (drand48() * M_PI * 2) - M_PI;
+    }
+
+    double mean, first, last, total = 0;
+    for (int i = 0; i < N; ++i) {
+	double real = mag[i] * cos(phase[i]);
+	double imag = mag[i] * sin(phase[i]);
+	if (i == 0) first = real;
+	if (i == N-1) last = imag;
+	total += real;
+	total += imag;
+    }
+    mean = total / (N*2);
+    cerr << "Naive method: mean = " << mean << ", first = " << first
+	 << ", last = " << last << endl;
+
+    v_polar_to_cartesian(target, mag, phase, N);
+
+    total = 0;
+
+    for (int i = 0; i < N; ++i) {
+	if (i == 0) first = target[i].re;
+	if (i == N-1) last = target[i].im;
+	total += target[i].re;
+	total += target[i].im;
+    }
+    mean = total / (N*2);
+    cerr << "v_polar_to_cartesian: mean = " << mean << ", first = " << first
+	 << ", last = " << last << endl;
+
+    int iterations = 10000;
+//    cerr << "Iterations: " << iterations << endl;
+	
+//    cerr << "CLOCKS_PER_SEC = " << CLOCKS_PER_SEC << endl;
+    float divisor = float(CLOCKS_PER_SEC) / 1000.f;
+
+    clock_t start = clock();
+
+    for (int j = 0; j < iterations; ++j) {
+	for (int i = 0; i < N; ++i) {
+	    target[i].re = mag[i] * cos(phase[i]);
+	    target[i].im = mag[i] * sin(phase[i]);
+	}
+    }
+    
+    clock_t end = clock();
+
+    cerr << "Time for naive method: " << float(end - start)/divisor << endl;
+
+    start = clock();
+
+    for (int j = 0; j < iterations; ++j) {
+	v_polar_to_cartesian(target, mag, phase, N);
+    }
+    
+    end = clock();
+
+    cerr << "Time for v_polar_to_cartesian: " << float(end - start)/divisor << endl;
+
+    return true;
+}
+
+bool
+testPolarToCartInterleaved()
+{
+    cerr << "testVectorOps: testing v_polar_interleaved_to_cartesian" << endl;
+
+    const int N = 1024;
+    bq_complex_t target[N];
+    bq_complex_element_t source[N*2];
+
+    for (int i = 0; i < N; ++i) {
+	source[i*2] = drand48();
+	source[i*2+1] = (drand48() * M_PI * 2) - M_PI;
+    }
+
+    double mean, first, last, total = 0;
+    for (int i = 0; i < N; ++i) {
+	double real = source[i*2] * cos(source[i*2+1]);
+	double imag = source[i*2] * sin(source[i*2+1]);
+	if (i == 0) first = real;
+	if (i == N-1) last = imag;
+	total += real;
+	total += imag;
+    }
+    mean = total / (N*2);
+    cerr << "Naive method: mean = " << mean << ", first = " << first
+	 << ", last = " << last << endl;
+
+    v_polar_interleaved_to_cartesian(target, source, N);
+
+    total = 0;
+
+    for (int i = 0; i < N; ++i) {
+	if (i == 0) first = target[i].re;
+	if (i == N-1) last = target[i].im;
+	total += target[i].re;
+	total += target[i].im;
+    }
+    mean = total / (N*2);
+    cerr << "v_polar_interleaved_to_cartesian: mean = " << mean << ", first = " << first
+	 << ", last = " << last << endl;
+
+    int iterations = 10000;
+//    cerr << "Iterations: " << iterations << endl;
+	
+//    cerr << "CLOCKS_PER_SEC = " << CLOCKS_PER_SEC << endl;
+    float divisor = float(CLOCKS_PER_SEC) / 1000.f;
+
+    clock_t start = clock();
+
+    for (int j = 0; j < iterations; ++j) {
+	for (int i = 0; i < N; ++i) {
+	    target[i].re = source[i*2] * cos(source[i*2+1]);
+	    target[i].im = source[i*2] * sin(source[i*2+1]);
+	}
+    }
+    
+    clock_t end = clock();
+
+    cerr << "Time for naive method: " << float(end - start)/divisor << endl;
+
+    start = clock();
+
+    for (int j = 0; j < iterations; ++j) {
+	v_polar_interleaved_to_cartesian(target, source, N);
+    }
+    
+    end = clock();
+
+    cerr << "Time for v_polar_interleaved_to_cartesian: " << float(end - start)/divisor << endl;
+
+    return true;
+}
+
+bool
+testCartToPolar()
+{
+    cerr << "testVectorOps: testing v_cartesian_to_polar" << endl;
+
+    const int N = 1024;
+    bq_complex_t source[N];
+    bq_complex_element_t mag[N];
+    bq_complex_element_t phase[N];
+
+    for (int i = 0; i < N; ++i) {
+        source[i].re = (drand48() * 2.0) - 1.0;
+        source[i].im = (drand48() * 2.0) - 1.0;
+    }
+
+    double mean, first, last, total = 0;
+    for (int i = 0; i < N; ++i) {
+        double mag = sqrt(source[i].re * source[i].re + source[i].im * source[i].im);
+        double phase = atan2(source[i].im, source[i].re);
+	if (i == 0) first = mag;
+	if (i == N-1) last = phase;
+	total += mag;
+	total += phase;
+    }
+    mean = total / (N*2);
+    cerr << "Naive method: mean = " << mean << ", first = " << first
+	 << ", last = " << last << endl;
+
+    v_cartesian_to_polar(mag, phase, source, N);
+
+    total = 0;
+
+    for (int i = 0; i < N; ++i) {
+	if (i == 0) first = mag[i];
+	if (i == N-1) last = phase[i];
+	total += mag[i];
+	total += phase[i];
+    }
+    mean = total / (N*2);
+    cerr << "v_cartesian_to_polar: mean = " << mean << ", first = " << first
+	 << ", last = " << last << endl;
+
+    int iterations = 10000;
+//    cerr << "Iterations: " << iterations << endl;
+	
+//    cerr << "CLOCKS_PER_SEC = " << CLOCKS_PER_SEC << endl;
+    float divisor = float(CLOCKS_PER_SEC) / 1000.f;
+
+    clock_t start = clock();
+
+    for (int j = 0; j < iterations; ++j) {
+	for (int i = 0; i < N; ++i) {
+            mag[i] = sqrt(source[i].re * source[i].re + source[i].im * source[i].im);
+            phase[i] = atan2(source[i].im, source[i].re);
+	}
+    }
+    
+    clock_t end = clock();
+
+    cerr << "Time for naive method: " << float(end - start)/divisor << endl;
+
+    start = clock();
+
+    for (int j = 0; j < iterations; ++j) {
+        v_cartesian_to_polar(mag, phase, source, N);
+    }
+    
+    end = clock();
+
+    cerr << "Time for v_cartesian_to_polar: " << float(end - start)/divisor << endl;
+
+    return true;
+}
+
+int main(int, char **)
+{
+    if (!testMultiply()) return 1;
+    if (!testPolarToCart()) return 1;
+    if (!testPolarToCartInterleaved()) return 1;
+    if (!testCartToPolar()) return 1;
+    return 0;
+}
+