chris@162: /** chris@162: * Copyright (c) 2014, 2015, Enzien Audio Ltd. chris@162: * chris@162: * Permission to use, copy, modify, and/or distribute this software for any chris@162: * purpose with or without fee is hereby granted, provided that the above chris@162: * copyright notice and this permission notice appear in all copies. chris@162: * chris@162: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH chris@162: * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY chris@162: * AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, chris@162: * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM chris@162: * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR chris@162: * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR chris@162: * PERFORMANCE OF THIS SOFTWARE. chris@162: */ chris@162: chris@162: #ifndef _HEAVY_SIGNAL_BIQUAD_H_ chris@162: #define _HEAVY_SIGNAL_BIQUAD_H_ chris@162: chris@162: #include "HvBase.h" chris@162: chris@162: // http://en.wikipedia.org/wiki/Digital_biquad_filter chris@162: typedef struct SignalBiquad { chris@162: #if HV_SIMD_AVX chris@162: __m256 xm1; chris@162: __m256 xm2; chris@162: #elif HV_SIMD_SSE chris@162: __m128 xm1; chris@162: __m128 xm2; chris@162: #elif HV_SIMD_NEON chris@162: float32x4_t xm1; chris@162: float32x4_t xm2; chris@162: #else // HV_SIMD_NONE chris@162: float x1; chris@162: float x2; chris@162: #endif chris@162: float y1; chris@162: float y2; chris@162: } SignalBiquad; chris@162: chris@162: hv_size_t sBiquad_init(SignalBiquad *o); chris@162: chris@162: void __hv_biquad_f(SignalBiquad *o, chris@162: hv_bInf_t bIn, hv_bInf_t bX0, hv_bInf_t bX1, hv_bInf_t bX2, hv_bInf_t bY1, hv_bInf_t bY2, chris@162: hv_bOutf_t bOut); chris@162: chris@162: typedef struct SignalBiquad_k { chris@162: #if HV_SIMD_AVX || HV_SIMD_SSE chris@162: // preprocessed filter coefficients chris@162: __m128 coeff_xp3; chris@162: __m128 coeff_xp2; chris@162: __m128 coeff_xp1; chris@162: __m128 coeff_x0; chris@162: __m128 coeff_xm1; chris@162: __m128 coeff_xm2; chris@162: __m128 coeff_ym1; chris@162: __m128 coeff_ym2; chris@162: chris@162: // filter state chris@162: __m128 xm1; chris@162: __m128 xm2; chris@162: __m128 ym1; chris@162: __m128 ym2; chris@162: #elif HV_SIMD_NEON chris@162: float32x4_t coeff_xp3; chris@162: float32x4_t coeff_xp2; chris@162: float32x4_t coeff_xp1; chris@162: float32x4_t coeff_x0; chris@162: float32x4_t coeff_xm1; chris@162: float32x4_t coeff_xm2; chris@162: float32x4_t coeff_ym1; chris@162: float32x4_t coeff_ym2; chris@162: float32x4_t xm1; chris@162: float32x4_t xm2; chris@162: float32x4_t ym1; chris@162: float32x4_t ym2; chris@162: #else // HV_SIMD_NONE chris@162: float xm1; chris@162: float xm2; chris@162: float ym1; chris@162: float ym2; chris@162: #endif chris@162: // original filter coefficients chris@162: float b0; // x[0] chris@162: float b1; // x[-1] chris@162: float b2; // x[-2] chris@162: float a1; // y[-1] chris@162: float a2; // y[-2] chris@162: } SignalBiquad_k; chris@162: chris@162: hv_size_t sBiquad_k_init(SignalBiquad_k *o, float x0, float x1, float x2, float y1, float y2); chris@162: chris@162: void sBiquad_k_onMessage(SignalBiquad_k *o, int letIn, const HvMessage *const m); chris@162: chris@162: static inline void __hv_biquad_k_f(SignalBiquad_k *o, hv_bInf_t bIn, hv_bOutf_t bOut) { chris@162: #if HV_SIMD_AVX chris@162: const __m128 c_xp3 = o->coeff_xp3; chris@162: const __m128 c_xp2 = o->coeff_xp2; chris@162: const __m128 c_xp1 = o->coeff_xp1; chris@162: const __m128 c_x0 = o->coeff_x0; chris@162: const __m128 c_xm1 = o->coeff_xm1; chris@162: const __m128 c_xm2 = o->coeff_xm2; chris@162: const __m128 c_ym1 = o->coeff_ym1; chris@162: const __m128 c_ym2 = o->coeff_ym2; chris@162: chris@162: // lower half chris@162: __m128 x3 = _mm_set1_ps(bIn[3]); chris@162: __m128 x2 = _mm_set1_ps(bIn[2]); chris@162: __m128 x1 = _mm_set1_ps(bIn[1]); chris@162: __m128 x0 = _mm_set1_ps(bIn[0]); chris@162: __m128 xm1 = o->xm1; chris@162: __m128 xm2 = o->xm2; chris@162: __m128 ym1 = o->ym1; chris@162: __m128 ym2 = o->ym2; chris@162: chris@162: __m128 a = _mm_mul_ps(c_xp3, x3); chris@162: __m128 b = _mm_mul_ps(c_xp2, x2); chris@162: __m128 c = _mm_mul_ps(c_xp1, x1); chris@162: __m128 d = _mm_mul_ps(c_x0, x0); chris@162: __m128 e = _mm_mul_ps(c_xm1, xm1); chris@162: __m128 f = _mm_mul_ps(c_xm2, xm2); chris@162: __m128 g = _mm_mul_ps(c_ym1, ym1); chris@162: __m128 h = _mm_mul_ps(c_ym2, ym2); chris@162: chris@162: __m128 i = _mm_add_ps(a, b); chris@162: __m128 j = _mm_add_ps(c, d); chris@162: __m128 k = _mm_add_ps(e, f); chris@162: __m128 l = _mm_add_ps(g, h); chris@162: __m128 m = _mm_add_ps(i, j); chris@162: __m128 n = _mm_add_ps(k, l); chris@162: chris@162: __m128 lo_y = _mm_add_ps(m, n); // lower part of output buffer chris@162: chris@162: // upper half chris@162: xm1 = x3; chris@162: xm2 = x2; chris@162: x3 = _mm_set1_ps(bIn[7]); chris@162: x2 = _mm_set1_ps(bIn[6]); chris@162: x1 = _mm_set1_ps(bIn[5]); chris@162: x0 = _mm_set1_ps(bIn[4]); chris@162: ym1 = _mm_set1_ps(lo_y[3]); chris@162: ym2 = _mm_set1_ps(lo_y[2]); chris@162: chris@162: a = _mm_mul_ps(c_xp3, x3); chris@162: b = _mm_mul_ps(c_xp2, x2); chris@162: c = _mm_mul_ps(c_xp1, x1); chris@162: d = _mm_mul_ps(c_x0, x0); chris@162: e = _mm_mul_ps(c_xm1, xm1); chris@162: f = _mm_mul_ps(c_xm2, xm2); chris@162: g = _mm_mul_ps(c_ym1, ym1); chris@162: h = _mm_mul_ps(c_ym2, ym2); chris@162: chris@162: i = _mm_add_ps(a, b); chris@162: j = _mm_add_ps(c, d); chris@162: k = _mm_add_ps(e, f); chris@162: l = _mm_add_ps(g, h); chris@162: m = _mm_add_ps(i, j); chris@162: n = _mm_add_ps(k, l); chris@162: chris@162: __m128 up_y = _mm_add_ps(m, n); // upper part of output buffer chris@162: chris@162: o->xm1 = x3; chris@162: o->xm2 = x2; chris@162: o->ym1 = _mm_set1_ps(up_y[3]); chris@162: o->ym2 = _mm_set1_ps(up_y[2]); chris@162: chris@162: *bOut = _mm256_insertf128_ps(_mm256_castps128_ps256(lo_y), up_y, 1); chris@162: #elif HV_SIMD_SSE chris@162: __m128 x3 = _mm_set1_ps(bIn[3]); chris@162: __m128 x2 = _mm_set1_ps(bIn[2]); chris@162: __m128 x1 = _mm_set1_ps(bIn[1]); chris@162: __m128 x0 = _mm_set1_ps(bIn[0]); chris@162: chris@162: __m128 a = _mm_mul_ps(o->coeff_xp3, x3); chris@162: __m128 b = _mm_mul_ps(o->coeff_xp2, x2); chris@162: __m128 c = _mm_mul_ps(o->coeff_xp1, x1); chris@162: __m128 d = _mm_mul_ps(o->coeff_x0, x0); chris@162: __m128 e = _mm_mul_ps(o->coeff_xm1, o->xm1); chris@162: __m128 f = _mm_mul_ps(o->coeff_xm2, o->xm2); chris@162: __m128 g = _mm_mul_ps(o->coeff_ym1, o->ym1); chris@162: __m128 h = _mm_mul_ps(o->coeff_ym2, o->ym2); chris@162: __m128 i = _mm_add_ps(a, b); chris@162: __m128 j = _mm_add_ps(c, d); chris@162: __m128 k = _mm_add_ps(e, f); chris@162: __m128 l = _mm_add_ps(g, h); chris@162: __m128 m = _mm_add_ps(i, j); chris@162: __m128 n = _mm_add_ps(k, l); chris@162: chris@162: __m128 y = _mm_add_ps(m, n); chris@162: chris@162: o->xm1 = x3; chris@162: o->xm2 = x2; chris@162: o->ym1 = _mm_set1_ps(y[3]); chris@162: o->ym2 = _mm_set1_ps(y[2]); chris@162: chris@162: *bOut = y; chris@162: #elif HV_SIMD_NEON chris@162: float32x4_t x3 = vdupq_n_f32(bIn[3]); chris@162: float32x4_t x2 = vdupq_n_f32(bIn[2]); chris@162: float32x4_t x1 = vdupq_n_f32(bIn[1]); chris@162: float32x4_t x0 = vdupq_n_f32(bIn[0]); chris@162: chris@162: float32x4_t a = vmulq_f32(o->coeff_xp3, x3); chris@162: float32x4_t b = vmulq_f32(o->coeff_xp2, x2); chris@162: float32x4_t c = vmulq_f32(o->coeff_xp1, x1); chris@162: float32x4_t d = vmulq_f32(o->coeff_x0, x0); chris@162: float32x4_t e = vmulq_f32(o->coeff_xm1, o->xm1); chris@162: float32x4_t f = vmulq_f32(o->coeff_xm2, o->xm2); chris@162: float32x4_t g = vmulq_f32(o->coeff_ym1, o->ym1); chris@162: float32x4_t h = vmulq_f32(o->coeff_ym2, o->ym2); chris@162: float32x4_t i = vaddq_f32(a, b); chris@162: float32x4_t j = vaddq_f32(c, d); chris@162: float32x4_t k = vaddq_f32(e, f); chris@162: float32x4_t l = vaddq_f32(g, h); chris@162: float32x4_t m = vaddq_f32(i, j); chris@162: float32x4_t n = vaddq_f32(k, l); chris@162: float32x4_t y = vaddq_f32(m, n); chris@162: chris@162: o->xm1 = x3; chris@162: o->xm2 = x2; chris@162: o->ym1 = vdupq_n_f32(y[3]); chris@162: o->ym2 = vdupq_n_f32(y[2]); chris@162: chris@162: *bOut = y; chris@162: #else // HV_SIMD_NONE chris@162: float y = o->b0*bIn + o->b1*o->xm1 + o->b2*o->xm2 - o->a1*o->ym1 - o->a2*o->ym2; chris@162: o->xm2 = o->xm1; chris@162: o->xm1 = bIn; chris@162: o->ym2 = o->ym1; chris@162: o->ym1 = y; chris@162: *bOut = y; chris@162: #endif chris@162: } chris@162: chris@162: #endif // _HEAVY_SIGNAL_BIQUAD_H_