Mercurial > hg > sv-dependency-builds
view src/rubberband-1.8.1/src/system/VectorOps.h @ 84:08ae793730bd
Add null config files
author | Chris Cannam |
---|---|
date | Mon, 02 Mar 2020 14:03:47 +0000 |
parents | 37bf6b4a2645 |
children |
line wrap: on
line source
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ /* Rubber Band Library An audio time-stretching and pitch-shifting library. Copyright 2007-2012 Particular Programs Ltd. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. See the file COPYING included with this distribution for more information. Alternatively, if you have a valid commercial licence for the Rubber Band Library obtained by agreement with the copyright holders, you may redistribute and/or modify it under the terms described in that licence. If you wish to distribute code using the Rubber Band Library under terms other than those of the GNU General Public License, you must obtain a valid commercial licence before doing so. */ #ifndef _RUBBERBAND_VECTOR_OPS_H_ #define _RUBBERBAND_VECTOR_OPS_H_ #ifdef HAVE_IPP #ifndef _MSC_VER #include <inttypes.h> #endif #include <ipps.h> #include <ippac.h> #endif #ifdef HAVE_VDSP #include <vecLib/vDSP.h> #include <vecLib/vForce.h> #endif #include <cstring> #include "sysutils.h" namespace RubberBand { // Note that all functions with a "target" vector have their arguments // in the same order as memcpy and friends, i.e. target vector first. // This is the reverse order from the IPP functions. // The ideal here is to write the basic loops in such a way as to be // auto-vectorizable by a sensible compiler (definitely gcc-4.3 on // Linux, ideally also gcc-4.0 on OS/X). template<typename T> inline void v_zero(T *const R__ ptr, const int count) { const T value = T(0); for (int i = 0; i < count; ++i) { ptr[i] = value; } } #if defined HAVE_IPP template<> inline void v_zero(float *const R__ ptr, const int count) { ippsZero_32f(ptr, count); } template<> inline void v_zero(double *const R__ ptr, const int count) { ippsZero_64f(ptr, count); } #elif defined HAVE_VDSP template<> inline void v_zero(float *const R__ ptr, const int count) { vDSP_vclr(ptr, 1, count); } template<> inline void v_zero(double *const R__ ptr, const int count) { vDSP_vclrD(ptr, 1, count); } #endif template<typename T> inline void v_zero_channels(T *const R__ *const R__ ptr, const int channels, const int count) { for (int c = 0; c < channels; ++c) { v_zero(ptr[c], count); } } template<typename T> inline void v_set(T *const R__ ptr, const T value, const int count) { for (int i = 0; i < count; ++i) { ptr[i] = value; } } template<typename T> inline void v_copy(T *const R__ dst, const T *const R__ src, const int count) { for (int i = 0; i < count; ++i) { dst[i] = src[i]; } } #if defined HAVE_IPP template<> inline void v_copy(float *const R__ dst, const float *const R__ src, const int count) { ippsCopy_32f(src, dst, count); } template<> inline void v_copy(double *const R__ dst, const double *const R__ src, const int count) { ippsCopy_64f(src, dst, count); } #endif template<typename T> inline void v_copy_channels(T *const R__ *const R__ dst, const T *const R__ *const R__ src, const int channels, const int count) { for (int c = 0; c < channels; ++c) { v_copy(dst[c], src[c], count); } } // src and dst alias by definition, so not restricted template<typename T> inline void v_move(T *const dst, const T *const src, const int count) { memmove(dst, src, count * sizeof(T)); } #if defined HAVE_IPP template<> inline void v_move(float *const dst, const float *const src, const int count) { ippsMove_32f(src, dst, count); } template<> inline void v_move(double *const dst, const double *const src, const int count) { ippsMove_64f(src, dst, count); } #endif template<typename T, typename U> inline void v_convert(U *const R__ dst, const T *const R__ src, const int count) { for (int i = 0; i < count; ++i) { dst[i] = U(src[i]); } } template<> inline void v_convert(float *const R__ dst, const float *const R__ src, const int count) { v_copy(dst, src, count); } template<> inline void v_convert(double *const R__ dst, const double *const R__ src, const int count) { v_copy(dst, src, count); } #if defined HAVE_IPP template<> inline void v_convert(double *const R__ dst, const float *const R__ src, const int count) { ippsConvert_32f64f(src, dst, count); } template<> inline void v_convert(float *const R__ dst, const double *const R__ src, const int count) { ippsConvert_64f32f(src, dst, count); } #elif defined HAVE_VDSP template<> inline void v_convert(double *const R__ dst, const float *const R__ src, const int count) { vDSP_vspdp((float *)src, 1, dst, 1, count); } template<> inline void v_convert(float *const R__ dst, const double *const R__ src, const int count) { vDSP_vdpsp((double *)src, 1, dst, 1, count); } #endif template<typename T, typename U> inline void v_convert_channels(U *const R__ *const R__ dst, const T *const R__ *const R__ src, const int channels, const int count) { for (int c = 0; c < channels; ++c) { v_convert(dst[c], src[c], count); } } template<typename T> inline void v_add(T *const R__ dst, const T *const R__ src, const int count) { for (int i = 0; i < count; ++i) { dst[i] += src[i]; } } template<typename T> inline void v_add(T *const R__ dst, const T value, const int count) { for (int i = 0; i < count; ++i) { dst[i] += value; } } #if defined HAVE_IPP template<> inline void v_add(float *const R__ dst, const float *const R__ src, const int count) { ippsAdd_32f_I(src, dst, count); } inline void v_add(double *const R__ dst, const double *const R__ src, const int count) { ippsAdd_64f_I(src, dst, count); } #endif template<typename T> inline void v_add_channels(T *const R__ *const R__ dst, const T *const R__ *const R__ src, const int channels, const int count) { for (int c = 0; c < channels; ++c) { v_add(dst[c], src[c], count); } } template<typename T, typename G> inline void v_add_with_gain(T *const R__ dst, const T *const R__ src, const int count, const G gain) { for (int i = 0; i < count; ++i) { dst[i] += src[i] * gain; } } template<typename T, typename G> inline void v_add_channels_with_gain(T *const R__ *const R__ dst, const T *const R__ *const R__ src, const int channels, const int count, const G gain) { for (int c = 0; c < channels; ++c) { v_add_with_gain(dst[c], src[c], count, gain); } } template<typename T> inline void v_subtract(T *const R__ dst, const T *const R__ src, const int count) { for (int i = 0; i < count; ++i) { dst[i] -= src[i]; } } #if defined HAVE_IPP template<> inline void v_subtract(float *const R__ dst, const float *const R__ src, const int count) { ippsSub_32f_I(src, dst, count); } inline void v_subtract(double *const R__ dst, const double *const R__ src, const int count) { ippsSub_64f_I(src, dst, count); } #endif template<typename T, typename G> inline void v_scale(T *const R__ dst, const G gain, const int count) { for (int i = 0; i < count; ++i) { dst[i] *= gain; } } #if defined HAVE_IPP template<> inline void v_scale(float *const R__ dst, const float gain, const int count) { ippsMulC_32f_I(gain, dst, count); } template<> inline void v_scale(double *const R__ dst, const double gain, const int count) { ippsMulC_64f_I(gain, dst, count); } #endif template<typename T> inline void v_multiply(T *const R__ dst, const T *const R__ src, const int count) { for (int i = 0; i < count; ++i) { dst[i] *= src[i]; } } #if defined HAVE_IPP template<> inline void v_multiply(float *const R__ dst, const float *const R__ src, const int count) { ippsMul_32f_I(src, dst, count); } template<> inline void v_multiply(double *const R__ dst, const double *const R__ src, const int count) { ippsMul_64f_I(src, dst, count); } #endif template<typename T> inline void v_multiply(T *const R__ dst, const T *const R__ src1, const T *const R__ src2, const int count) { for (int i = 0; i < count; ++i) { dst[i] = src1[i] * src2[i]; } } template<typename T> inline void v_divide(T *const R__ dst, const T *const R__ src, const int count) { for (int i = 0; i < count; ++i) { dst[i] /= src[i]; } } #if defined HAVE_IPP template<> inline void v_divide(float *const R__ dst, const float *const R__ src, const int count) { ippsDiv_32f_I(src, dst, count); } template<> inline void v_divide(double *const R__ dst, const double *const R__ src, const int count) { ippsDiv_64f_I(src, dst, count); } #endif #if defined HAVE_IPP template<> inline void v_multiply(float *const R__ dst, const float *const R__ src1, const float *const R__ src2, const int count) { ippsMul_32f(src1, src2, dst, count); } template<> inline void v_multiply(double *const R__ dst, const double *const R__ src1, const double *const R__ src2, const int count) { ippsMul_64f(src1, src2, dst, count); } #endif template<typename T> inline void v_multiply_and_add(T *const R__ dst, const T *const R__ src1, const T *const R__ src2, const int count) { for (int i = 0; i < count; ++i) { dst[i] += src1[i] * src2[i]; } } #if defined HAVE_IPP template<> inline void v_multiply_and_add(float *const R__ dst, const float *const R__ src1, const float *const R__ src2, const int count) { ippsAddProduct_32f(src1, src2, dst, count); } template<> inline void v_multiply_and_add(double *const R__ dst, const double *const R__ src1, const double *const R__ src2, const int count) { ippsAddProduct_64f(src1, src2, dst, count); } #endif template<typename T> inline T v_sum(const T *const R__ src, const int count) { T result = T(); for (int i = 0; i < count; ++i) { result += src[i]; } return result; } template<typename T> inline void v_log(T *const R__ dst, const int count) { for (int i = 0; i < count; ++i) { dst[i] = log(dst[i]); } } #if defined HAVE_IPP template<> inline void v_log(float *const R__ dst, const int count) { ippsLn_32f_I(dst, count); } template<> inline void v_log(double *const R__ dst, const int count) { ippsLn_64f_I(dst, count); } #elif defined HAVE_VDSP // no in-place vForce functions for these -- can we use the // out-of-place functions with equal input and output vectors? can we // use an out-of-place one with temporary buffer and still be faster // than doing it any other way? template<> inline void v_log(float *const R__ dst, const int count) { float tmp[count]; vvlogf(tmp, dst, &count); v_copy(dst, tmp, count); } template<> inline void v_log(double *const R__ dst, const int count) { double tmp[count]; vvlog(tmp, dst, &count); v_copy(dst, tmp, count); } #endif template<typename T> inline void v_exp(T *const R__ dst, const int count) { for (int i = 0; i < count; ++i) { dst[i] = exp(dst[i]); } } #if defined HAVE_IPP template<> inline void v_exp(float *const R__ dst, const int count) { ippsExp_32f_I(dst, count); } template<> inline void v_exp(double *const R__ dst, const int count) { ippsExp_64f_I(dst, count); } #elif defined HAVE_VDSP // no in-place vForce functions for these -- can we use the // out-of-place functions with equal input and output vectors? can we // use an out-of-place one with temporary buffer and still be faster // than doing it any other way? template<> inline void v_exp(float *const R__ dst, const int count) { float tmp[count]; vvexpf(tmp, dst, &count); v_copy(dst, tmp, count); } template<> inline void v_exp(double *const R__ dst, const int count) { double tmp[count]; vvexp(tmp, dst, &count); v_copy(dst, tmp, count); } #endif template<typename T> inline void v_sqrt(T *const R__ dst, const int count) { for (int i = 0; i < count; ++i) { dst[i] = sqrt(dst[i]); } } #if defined HAVE_IPP template<> inline void v_sqrt(float *const R__ dst, const int count) { ippsSqrt_32f_I(dst, count); } template<> inline void v_sqrt(double *const R__ dst, const int count) { ippsSqrt_64f_I(dst, count); } #elif defined HAVE_VDSP // no in-place vForce functions for these -- can we use the // out-of-place functions with equal input and output vectors? can we // use an out-of-place one with temporary buffer and still be faster // than doing it any other way? template<> inline void v_sqrt(float *const R__ dst, const int count) { float tmp[count]; vvsqrtf(tmp, dst, &count); v_copy(dst, tmp, count); } template<> inline void v_sqrt(double *const R__ dst, const int count) { double tmp[count]; vvsqrt(tmp, dst, &count); v_copy(dst, tmp, count); } #endif template<typename T> inline void v_square(T *const R__ dst, const int count) { for (int i = 0; i < count; ++i) { dst[i] = dst[i] * dst[i]; } } #if defined HAVE_IPP template<> inline void v_square(float *const R__ dst, const int count) { ippsSqr_32f_I(dst, count); } template<> inline void v_square(double *const R__ dst, const int count) { ippsSqr_64f_I(dst, count); } #endif template<typename T> inline void v_abs(T *const R__ dst, const int count) { for (int i = 0; i < count; ++i) { dst[i] = fabs(dst[i]); } } #if defined HAVE_IPP template<> inline void v_abs(float *const R__ dst, const int count) { ippsAbs_32f_I(dst, count); } template<> inline void v_abs(double *const R__ dst, const int count) { ippsAbs_64f_I(dst, count); } #elif defined HAVE_VDSP template<> inline void v_abs(float *const R__ dst, const int count) { float tmp[count]; #if (MACOSX_DEPLOYMENT_TARGET <= 1070 && MAC_OS_X_VERSION_MIN_REQUIRED <= 1070) vvfabf(tmp, dst, &count); #else vvfabsf(tmp, dst, &count); #endif v_copy(dst, tmp, count); } #endif template<typename T> inline void v_interleave(T *const R__ dst, const T *const R__ *const R__ src, const int channels, const int count) { int idx = 0; switch (channels) { case 2: // common case, may be vectorized by compiler if hardcoded for (int i = 0; i < count; ++i) { for (int j = 0; j < 2; ++j) { dst[idx++] = src[j][i]; } } return; case 1: v_copy(dst, src[0], count); return; default: for (int i = 0; i < count; ++i) { for (int j = 0; j < channels; ++j) { dst[idx++] = src[j][i]; } } } } #if defined HAVE_IPP template<> inline void v_interleave(float *const R__ dst, const float *const R__ *const R__ src, const int channels, const int count) { ippsInterleave_32f((const Ipp32f **)src, channels, count, dst); } // IPP does not (currently?) provide double-precision interleave #endif template<typename T> inline void v_deinterleave(T *const R__ *const R__ dst, const T *const R__ src, const int channels, const int count) { int idx = 0; switch (channels) { case 2: // common case, may be vectorized by compiler if hardcoded for (int i = 0; i < count; ++i) { for (int j = 0; j < 2; ++j) { dst[j][i] = src[idx++]; } } return; case 1: v_copy(dst[0], src, count); return; default: for (int i = 0; i < count; ++i) { for (int j = 0; j < channels; ++j) { dst[j][i] = src[idx++]; } } } } #if defined HAVE_IPP template<> inline void v_deinterleave(float *const R__ *const R__ dst, const float *const R__ src, const int channels, const int count) { ippsDeinterleave_32f((const Ipp32f *)src, channels, count, (Ipp32f **)dst); } // IPP does not (currently?) provide double-precision deinterleave #endif template<typename T> inline void v_fftshift(T *const R__ ptr, const int count) { const int hs = count/2; for (int i = 0; i < hs; ++i) { T t = ptr[i]; ptr[i] = ptr[i + hs]; ptr[i + hs] = t; } } template<typename T> inline T v_mean(const T *const R__ ptr, const int count) { T t = T(0); for (int i = 0; i < count; ++i) { t += ptr[i]; } t /= T(count); return t; } template<typename T> inline T v_mean_channels(const T *const R__ *const R__ ptr, const int channels, const int count) { T t = T(0); for (int c = 0; c < channels; ++c) { t += v_mean(ptr[c], count); } t /= T(channels); return t; } } #endif