beaglert: projects/heavy/envelopeTrigger/SignalBiquad.h annotate

annotate projects/heavy/envelopeTrigger/SignalBiquad.h @ 162:c3e8226a5651 heavy-updated

- added additional flags to C rules (-DNDEBUG, -mfpu=neon) - sample-accurate envelope triggering pd/heavy example

author	chnrx <chris.heinrichs@gmail.com>
date	Thu, 12 Nov 2015 14:59:46 +0000
parents
children

rev	line source
chris@162	1 /**
chris@162	2 * Copyright (c) 2014, 2015, Enzien Audio Ltd.
chris@162	3 *
chris@162	4 * Permission to use, copy, modify, and/or distribute this software for any
chris@162	5 * purpose with or without fee is hereby granted, provided that the above
chris@162	6 * copyright notice and this permission notice appear in all copies.
chris@162	7 *
chris@162	8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
chris@162	9 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
chris@162	10 * AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
chris@162	11 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
chris@162	12 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
chris@162	13 * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
chris@162	14 * PERFORMANCE OF THIS SOFTWARE.
chris@162	15 */
chris@162	16
chris@162	17 #ifndef _HEAVY_SIGNAL_BIQUAD_H_
chris@162	18 #define _HEAVY_SIGNAL_BIQUAD_H_
chris@162	19
chris@162	20 #include "HvBase.h"
chris@162	21
chris@162	22 // http://en.wikipedia.org/wiki/Digital_biquad_filter
chris@162	23 typedef struct SignalBiquad {
chris@162	24 #if HV_SIMD_AVX
chris@162	25 __m256 xm1;
chris@162	26 __m256 xm2;
chris@162	27 #elif HV_SIMD_SSE
chris@162	28 __m128 xm1;
chris@162	29 __m128 xm2;
chris@162	30 #elif HV_SIMD_NEON
chris@162	31 float32x4_t xm1;
chris@162	32 float32x4_t xm2;
chris@162	33 #else // HV_SIMD_NONE
chris@162	34 float x1;
chris@162	35 float x2;
chris@162	36 #endif
chris@162	37 float y1;
chris@162	38 float y2;
chris@162	39 } SignalBiquad;
chris@162	40
chris@162	41 hv_size_t sBiquad_init(SignalBiquad *o);
chris@162	42
chris@162	43 void __hv_biquad_f(SignalBiquad *o,
chris@162	44 hv_bInf_t bIn, hv_bInf_t bX0, hv_bInf_t bX1, hv_bInf_t bX2, hv_bInf_t bY1, hv_bInf_t bY2,
chris@162	45 hv_bOutf_t bOut);
chris@162	46
chris@162	47 typedef struct SignalBiquad_k {
chris@162	48 #if HV_SIMD_AVX \|\| HV_SIMD_SSE
chris@162	49 // preprocessed filter coefficients
chris@162	50 __m128 coeff_xp3;
chris@162	51 __m128 coeff_xp2;
chris@162	52 __m128 coeff_xp1;
chris@162	53 __m128 coeff_x0;
chris@162	54 __m128 coeff_xm1;
chris@162	55 __m128 coeff_xm2;
chris@162	56 __m128 coeff_ym1;
chris@162	57 __m128 coeff_ym2;
chris@162	58
chris@162	59 // filter state
chris@162	60 __m128 xm1;
chris@162	61 __m128 xm2;
chris@162	62 __m128 ym1;
chris@162	63 __m128 ym2;
chris@162	64 #elif HV_SIMD_NEON
chris@162	65 float32x4_t coeff_xp3;
chris@162	66 float32x4_t coeff_xp2;
chris@162	67 float32x4_t coeff_xp1;
chris@162	68 float32x4_t coeff_x0;
chris@162	69 float32x4_t coeff_xm1;
chris@162	70 float32x4_t coeff_xm2;
chris@162	71 float32x4_t coeff_ym1;
chris@162	72 float32x4_t coeff_ym2;
chris@162	73 float32x4_t xm1;
chris@162	74 float32x4_t xm2;
chris@162	75 float32x4_t ym1;
chris@162	76 float32x4_t ym2;
chris@162	77 #else // HV_SIMD_NONE
chris@162	78 float xm1;
chris@162	79 float xm2;
chris@162	80 float ym1;
chris@162	81 float ym2;
chris@162	82 #endif
chris@162	83 // original filter coefficients
chris@162	84 float b0; // x[0]
chris@162	85 float b1; // x[-1]
chris@162	86 float b2; // x[-2]
chris@162	87 float a1; // y[-1]
chris@162	88 float a2; // y[-2]
chris@162	89 } SignalBiquad_k;
chris@162	90
chris@162	91 hv_size_t sBiquad_k_init(SignalBiquad_k *o, float x0, float x1, float x2, float y1, float y2);
chris@162	92
chris@162	93 void sBiquad_k_onMessage(SignalBiquad_k o, int letIn, const HvMessage const m);
chris@162	94
chris@162	95 static inline void __hv_biquad_k_f(SignalBiquad_k *o, hv_bInf_t bIn, hv_bOutf_t bOut) {
chris@162	96 #if HV_SIMD_AVX
chris@162	97 const __m128 c_xp3 = o->coeff_xp3;
chris@162	98 const __m128 c_xp2 = o->coeff_xp2;
chris@162	99 const __m128 c_xp1 = o->coeff_xp1;
chris@162	100 const __m128 c_x0 = o->coeff_x0;
chris@162	101 const __m128 c_xm1 = o->coeff_xm1;
chris@162	102 const __m128 c_xm2 = o->coeff_xm2;
chris@162	103 const __m128 c_ym1 = o->coeff_ym1;
chris@162	104 const __m128 c_ym2 = o->coeff_ym2;
chris@162	105
chris@162	106 // lower half
chris@162	107 __m128 x3 = _mm_set1_ps(bIn[3]);
chris@162	108 __m128 x2 = _mm_set1_ps(bIn[2]);
chris@162	109 __m128 x1 = _mm_set1_ps(bIn[1]);
chris@162	110 __m128 x0 = _mm_set1_ps(bIn[0]);
chris@162	111 __m128 xm1 = o->xm1;
chris@162	112 __m128 xm2 = o->xm2;
chris@162	113 __m128 ym1 = o->ym1;
chris@162	114 __m128 ym2 = o->ym2;
chris@162	115
chris@162	116 __m128 a = _mm_mul_ps(c_xp3, x3);
chris@162	117 __m128 b = _mm_mul_ps(c_xp2, x2);
chris@162	118 __m128 c = _mm_mul_ps(c_xp1, x1);
chris@162	119 __m128 d = _mm_mul_ps(c_x0, x0);
chris@162	120 __m128 e = _mm_mul_ps(c_xm1, xm1);
chris@162	121 __m128 f = _mm_mul_ps(c_xm2, xm2);
chris@162	122 __m128 g = _mm_mul_ps(c_ym1, ym1);
chris@162	123 __m128 h = _mm_mul_ps(c_ym2, ym2);
chris@162	124
chris@162	125 __m128 i = _mm_add_ps(a, b);
chris@162	126 __m128 j = _mm_add_ps(c, d);
chris@162	127 __m128 k = _mm_add_ps(e, f);
chris@162	128 __m128 l = _mm_add_ps(g, h);
chris@162	129 __m128 m = _mm_add_ps(i, j);
chris@162	130 __m128 n = _mm_add_ps(k, l);
chris@162	131
chris@162	132 __m128 lo_y = _mm_add_ps(m, n); // lower part of output buffer
chris@162	133
chris@162	134 // upper half
chris@162	135 xm1 = x3;
chris@162	136 xm2 = x2;
chris@162	137 x3 = _mm_set1_ps(bIn[7]);
chris@162	138 x2 = _mm_set1_ps(bIn[6]);
chris@162	139 x1 = _mm_set1_ps(bIn[5]);
chris@162	140 x0 = _mm_set1_ps(bIn[4]);
chris@162	141 ym1 = _mm_set1_ps(lo_y[3]);
chris@162	142 ym2 = _mm_set1_ps(lo_y[2]);
chris@162	143
chris@162	144 a = _mm_mul_ps(c_xp3, x3);
chris@162	145 b = _mm_mul_ps(c_xp2, x2);
chris@162	146 c = _mm_mul_ps(c_xp1, x1);
chris@162	147 d = _mm_mul_ps(c_x0, x0);
chris@162	148 e = _mm_mul_ps(c_xm1, xm1);
chris@162	149 f = _mm_mul_ps(c_xm2, xm2);
chris@162	150 g = _mm_mul_ps(c_ym1, ym1);
chris@162	151 h = _mm_mul_ps(c_ym2, ym2);
chris@162	152
chris@162	153 i = _mm_add_ps(a, b);
chris@162	154 j = _mm_add_ps(c, d);
chris@162	155 k = _mm_add_ps(e, f);
chris@162	156 l = _mm_add_ps(g, h);
chris@162	157 m = _mm_add_ps(i, j);
chris@162	158 n = _mm_add_ps(k, l);
chris@162	159
chris@162	160 __m128 up_y = _mm_add_ps(m, n); // upper part of output buffer
chris@162	161
chris@162	162 o->xm1 = x3;
chris@162	163 o->xm2 = x2;
chris@162	164 o->ym1 = _mm_set1_ps(up_y[3]);
chris@162	165 o->ym2 = _mm_set1_ps(up_y[2]);
chris@162	166
chris@162	167 *bOut = _mm256_insertf128_ps(_mm256_castps128_ps256(lo_y), up_y, 1);
chris@162	168 #elif HV_SIMD_SSE
chris@162	169 __m128 x3 = _mm_set1_ps(bIn[3]);
chris@162	170 __m128 x2 = _mm_set1_ps(bIn[2]);
chris@162	171 __m128 x1 = _mm_set1_ps(bIn[1]);
chris@162	172 __m128 x0 = _mm_set1_ps(bIn[0]);
chris@162	173
chris@162	174 __m128 a = _mm_mul_ps(o->coeff_xp3, x3);
chris@162	175 __m128 b = _mm_mul_ps(o->coeff_xp2, x2);
chris@162	176 __m128 c = _mm_mul_ps(o->coeff_xp1, x1);
chris@162	177 __m128 d = _mm_mul_ps(o->coeff_x0, x0);
chris@162	178 __m128 e = _mm_mul_ps(o->coeff_xm1, o->xm1);
chris@162	179 __m128 f = _mm_mul_ps(o->coeff_xm2, o->xm2);
chris@162	180 __m128 g = _mm_mul_ps(o->coeff_ym1, o->ym1);
chris@162	181 __m128 h = _mm_mul_ps(o->coeff_ym2, o->ym2);
chris@162	182 __m128 i = _mm_add_ps(a, b);
chris@162	183 __m128 j = _mm_add_ps(c, d);
chris@162	184 __m128 k = _mm_add_ps(e, f);
chris@162	185 __m128 l = _mm_add_ps(g, h);
chris@162	186 __m128 m = _mm_add_ps(i, j);
chris@162	187 __m128 n = _mm_add_ps(k, l);
chris@162	188
chris@162	189 __m128 y = _mm_add_ps(m, n);
chris@162	190
chris@162	191 o->xm1 = x3;
chris@162	192 o->xm2 = x2;
chris@162	193 o->ym1 = _mm_set1_ps(y[3]);
chris@162	194 o->ym2 = _mm_set1_ps(y[2]);
chris@162	195
chris@162	196 *bOut = y;
chris@162	197 #elif HV_SIMD_NEON
chris@162	198 float32x4_t x3 = vdupq_n_f32(bIn[3]);
chris@162	199 float32x4_t x2 = vdupq_n_f32(bIn[2]);
chris@162	200 float32x4_t x1 = vdupq_n_f32(bIn[1]);
chris@162	201 float32x4_t x0 = vdupq_n_f32(bIn[0]);
chris@162	202
chris@162	203 float32x4_t a = vmulq_f32(o->coeff_xp3, x3);
chris@162	204 float32x4_t b = vmulq_f32(o->coeff_xp2, x2);
chris@162	205 float32x4_t c = vmulq_f32(o->coeff_xp1, x1);
chris@162	206 float32x4_t d = vmulq_f32(o->coeff_x0, x0);
chris@162	207 float32x4_t e = vmulq_f32(o->coeff_xm1, o->xm1);
chris@162	208 float32x4_t f = vmulq_f32(o->coeff_xm2, o->xm2);
chris@162	209 float32x4_t g = vmulq_f32(o->coeff_ym1, o->ym1);
chris@162	210 float32x4_t h = vmulq_f32(o->coeff_ym2, o->ym2);
chris@162	211 float32x4_t i = vaddq_f32(a, b);
chris@162	212 float32x4_t j = vaddq_f32(c, d);
chris@162	213 float32x4_t k = vaddq_f32(e, f);
chris@162	214 float32x4_t l = vaddq_f32(g, h);
chris@162	215 float32x4_t m = vaddq_f32(i, j);
chris@162	216 float32x4_t n = vaddq_f32(k, l);
chris@162	217 float32x4_t y = vaddq_f32(m, n);
chris@162	218
chris@162	219 o->xm1 = x3;
chris@162	220 o->xm2 = x2;
chris@162	221 o->ym1 = vdupq_n_f32(y[3]);
chris@162	222 o->ym2 = vdupq_n_f32(y[2]);
chris@162	223
chris@162	224 *bOut = y;
chris@162	225 #else // HV_SIMD_NONE
chris@162	226 float y = o->b0bIn + o->b1o->xm1 + o->b2o->xm2 - o->a1o->ym1 - o->a2*o->ym2;
chris@162	227 o->xm2 = o->xm1;
chris@162	228 o->xm1 = bIn;
chris@162	229 o->ym2 = o->ym1;
chris@162	230 o->ym1 = y;
chris@162	231 *bOut = y;
chris@162	232 #endif
chris@162	233 }
chris@162	234
chris@162	235 #endif // _HEAVY_SIGNAL_BIQUAD_H_

Mercurial > hg > beaglert

annotate projects/heavy/envelopeTrigger/SignalBiquad.h @ 162:c3e8226a5651 heavy-updated