chris@162
|
1 /**
|
chris@162
|
2 * Copyright (c) 2014, 2015, Enzien Audio Ltd.
|
chris@162
|
3 *
|
chris@162
|
4 * Permission to use, copy, modify, and/or distribute this software for any
|
chris@162
|
5 * purpose with or without fee is hereby granted, provided that the above
|
chris@162
|
6 * copyright notice and this permission notice appear in all copies.
|
chris@162
|
7 *
|
chris@162
|
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
|
chris@162
|
9 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
|
chris@162
|
10 * AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
|
chris@162
|
11 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
|
chris@162
|
12 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
|
chris@162
|
13 * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
chris@162
|
14 * PERFORMANCE OF THIS SOFTWARE.
|
chris@162
|
15 */
|
chris@162
|
16
|
chris@162
|
17 #ifndef _HEAVY_SIGNAL_BIQUAD_H_
|
chris@162
|
18 #define _HEAVY_SIGNAL_BIQUAD_H_
|
chris@162
|
19
|
chris@162
|
20 #include "HvBase.h"
|
chris@162
|
21
|
chris@162
|
22 // http://en.wikipedia.org/wiki/Digital_biquad_filter
|
chris@162
|
23 typedef struct SignalBiquad {
|
chris@162
|
24 #if HV_SIMD_AVX
|
chris@162
|
25 __m256 xm1;
|
chris@162
|
26 __m256 xm2;
|
chris@162
|
27 #elif HV_SIMD_SSE
|
chris@162
|
28 __m128 xm1;
|
chris@162
|
29 __m128 xm2;
|
chris@162
|
30 #elif HV_SIMD_NEON
|
chris@162
|
31 float32x4_t xm1;
|
chris@162
|
32 float32x4_t xm2;
|
chris@162
|
33 #else // HV_SIMD_NONE
|
chris@162
|
34 float x1;
|
chris@162
|
35 float x2;
|
chris@162
|
36 #endif
|
chris@162
|
37 float y1;
|
chris@162
|
38 float y2;
|
chris@162
|
39 } SignalBiquad;
|
chris@162
|
40
|
chris@162
|
41 hv_size_t sBiquad_init(SignalBiquad *o);
|
chris@162
|
42
|
chris@162
|
43 void __hv_biquad_f(SignalBiquad *o,
|
chris@162
|
44 hv_bInf_t bIn, hv_bInf_t bX0, hv_bInf_t bX1, hv_bInf_t bX2, hv_bInf_t bY1, hv_bInf_t bY2,
|
chris@162
|
45 hv_bOutf_t bOut);
|
chris@162
|
46
|
chris@162
|
47 typedef struct SignalBiquad_k {
|
chris@162
|
48 #if HV_SIMD_AVX || HV_SIMD_SSE
|
chris@162
|
49 // preprocessed filter coefficients
|
chris@162
|
50 __m128 coeff_xp3;
|
chris@162
|
51 __m128 coeff_xp2;
|
chris@162
|
52 __m128 coeff_xp1;
|
chris@162
|
53 __m128 coeff_x0;
|
chris@162
|
54 __m128 coeff_xm1;
|
chris@162
|
55 __m128 coeff_xm2;
|
chris@162
|
56 __m128 coeff_ym1;
|
chris@162
|
57 __m128 coeff_ym2;
|
chris@162
|
58
|
chris@162
|
59 // filter state
|
chris@162
|
60 __m128 xm1;
|
chris@162
|
61 __m128 xm2;
|
chris@162
|
62 __m128 ym1;
|
chris@162
|
63 __m128 ym2;
|
chris@162
|
64 #elif HV_SIMD_NEON
|
chris@162
|
65 float32x4_t coeff_xp3;
|
chris@162
|
66 float32x4_t coeff_xp2;
|
chris@162
|
67 float32x4_t coeff_xp1;
|
chris@162
|
68 float32x4_t coeff_x0;
|
chris@162
|
69 float32x4_t coeff_xm1;
|
chris@162
|
70 float32x4_t coeff_xm2;
|
chris@162
|
71 float32x4_t coeff_ym1;
|
chris@162
|
72 float32x4_t coeff_ym2;
|
chris@162
|
73 float32x4_t xm1;
|
chris@162
|
74 float32x4_t xm2;
|
chris@162
|
75 float32x4_t ym1;
|
chris@162
|
76 float32x4_t ym2;
|
chris@162
|
77 #else // HV_SIMD_NONE
|
chris@162
|
78 float xm1;
|
chris@162
|
79 float xm2;
|
chris@162
|
80 float ym1;
|
chris@162
|
81 float ym2;
|
chris@162
|
82 #endif
|
chris@162
|
83 // original filter coefficients
|
chris@162
|
84 float b0; // x[0]
|
chris@162
|
85 float b1; // x[-1]
|
chris@162
|
86 float b2; // x[-2]
|
chris@162
|
87 float a1; // y[-1]
|
chris@162
|
88 float a2; // y[-2]
|
chris@162
|
89 } SignalBiquad_k;
|
chris@162
|
90
|
chris@162
|
91 hv_size_t sBiquad_k_init(SignalBiquad_k *o, float x0, float x1, float x2, float y1, float y2);
|
chris@162
|
92
|
chris@162
|
93 void sBiquad_k_onMessage(SignalBiquad_k *o, int letIn, const HvMessage *const m);
|
chris@162
|
94
|
chris@162
|
95 static inline void __hv_biquad_k_f(SignalBiquad_k *o, hv_bInf_t bIn, hv_bOutf_t bOut) {
|
chris@162
|
96 #if HV_SIMD_AVX
|
chris@162
|
97 const __m128 c_xp3 = o->coeff_xp3;
|
chris@162
|
98 const __m128 c_xp2 = o->coeff_xp2;
|
chris@162
|
99 const __m128 c_xp1 = o->coeff_xp1;
|
chris@162
|
100 const __m128 c_x0 = o->coeff_x0;
|
chris@162
|
101 const __m128 c_xm1 = o->coeff_xm1;
|
chris@162
|
102 const __m128 c_xm2 = o->coeff_xm2;
|
chris@162
|
103 const __m128 c_ym1 = o->coeff_ym1;
|
chris@162
|
104 const __m128 c_ym2 = o->coeff_ym2;
|
chris@162
|
105
|
chris@162
|
106 // lower half
|
chris@162
|
107 __m128 x3 = _mm_set1_ps(bIn[3]);
|
chris@162
|
108 __m128 x2 = _mm_set1_ps(bIn[2]);
|
chris@162
|
109 __m128 x1 = _mm_set1_ps(bIn[1]);
|
chris@162
|
110 __m128 x0 = _mm_set1_ps(bIn[0]);
|
chris@162
|
111 __m128 xm1 = o->xm1;
|
chris@162
|
112 __m128 xm2 = o->xm2;
|
chris@162
|
113 __m128 ym1 = o->ym1;
|
chris@162
|
114 __m128 ym2 = o->ym2;
|
chris@162
|
115
|
chris@162
|
116 __m128 a = _mm_mul_ps(c_xp3, x3);
|
chris@162
|
117 __m128 b = _mm_mul_ps(c_xp2, x2);
|
chris@162
|
118 __m128 c = _mm_mul_ps(c_xp1, x1);
|
chris@162
|
119 __m128 d = _mm_mul_ps(c_x0, x0);
|
chris@162
|
120 __m128 e = _mm_mul_ps(c_xm1, xm1);
|
chris@162
|
121 __m128 f = _mm_mul_ps(c_xm2, xm2);
|
chris@162
|
122 __m128 g = _mm_mul_ps(c_ym1, ym1);
|
chris@162
|
123 __m128 h = _mm_mul_ps(c_ym2, ym2);
|
chris@162
|
124
|
chris@162
|
125 __m128 i = _mm_add_ps(a, b);
|
chris@162
|
126 __m128 j = _mm_add_ps(c, d);
|
chris@162
|
127 __m128 k = _mm_add_ps(e, f);
|
chris@162
|
128 __m128 l = _mm_add_ps(g, h);
|
chris@162
|
129 __m128 m = _mm_add_ps(i, j);
|
chris@162
|
130 __m128 n = _mm_add_ps(k, l);
|
chris@162
|
131
|
chris@162
|
132 __m128 lo_y = _mm_add_ps(m, n); // lower part of output buffer
|
chris@162
|
133
|
chris@162
|
134 // upper half
|
chris@162
|
135 xm1 = x3;
|
chris@162
|
136 xm2 = x2;
|
chris@162
|
137 x3 = _mm_set1_ps(bIn[7]);
|
chris@162
|
138 x2 = _mm_set1_ps(bIn[6]);
|
chris@162
|
139 x1 = _mm_set1_ps(bIn[5]);
|
chris@162
|
140 x0 = _mm_set1_ps(bIn[4]);
|
chris@162
|
141 ym1 = _mm_set1_ps(lo_y[3]);
|
chris@162
|
142 ym2 = _mm_set1_ps(lo_y[2]);
|
chris@162
|
143
|
chris@162
|
144 a = _mm_mul_ps(c_xp3, x3);
|
chris@162
|
145 b = _mm_mul_ps(c_xp2, x2);
|
chris@162
|
146 c = _mm_mul_ps(c_xp1, x1);
|
chris@162
|
147 d = _mm_mul_ps(c_x0, x0);
|
chris@162
|
148 e = _mm_mul_ps(c_xm1, xm1);
|
chris@162
|
149 f = _mm_mul_ps(c_xm2, xm2);
|
chris@162
|
150 g = _mm_mul_ps(c_ym1, ym1);
|
chris@162
|
151 h = _mm_mul_ps(c_ym2, ym2);
|
chris@162
|
152
|
chris@162
|
153 i = _mm_add_ps(a, b);
|
chris@162
|
154 j = _mm_add_ps(c, d);
|
chris@162
|
155 k = _mm_add_ps(e, f);
|
chris@162
|
156 l = _mm_add_ps(g, h);
|
chris@162
|
157 m = _mm_add_ps(i, j);
|
chris@162
|
158 n = _mm_add_ps(k, l);
|
chris@162
|
159
|
chris@162
|
160 __m128 up_y = _mm_add_ps(m, n); // upper part of output buffer
|
chris@162
|
161
|
chris@162
|
162 o->xm1 = x3;
|
chris@162
|
163 o->xm2 = x2;
|
chris@162
|
164 o->ym1 = _mm_set1_ps(up_y[3]);
|
chris@162
|
165 o->ym2 = _mm_set1_ps(up_y[2]);
|
chris@162
|
166
|
chris@162
|
167 *bOut = _mm256_insertf128_ps(_mm256_castps128_ps256(lo_y), up_y, 1);
|
chris@162
|
168 #elif HV_SIMD_SSE
|
chris@162
|
169 __m128 x3 = _mm_set1_ps(bIn[3]);
|
chris@162
|
170 __m128 x2 = _mm_set1_ps(bIn[2]);
|
chris@162
|
171 __m128 x1 = _mm_set1_ps(bIn[1]);
|
chris@162
|
172 __m128 x0 = _mm_set1_ps(bIn[0]);
|
chris@162
|
173
|
chris@162
|
174 __m128 a = _mm_mul_ps(o->coeff_xp3, x3);
|
chris@162
|
175 __m128 b = _mm_mul_ps(o->coeff_xp2, x2);
|
chris@162
|
176 __m128 c = _mm_mul_ps(o->coeff_xp1, x1);
|
chris@162
|
177 __m128 d = _mm_mul_ps(o->coeff_x0, x0);
|
chris@162
|
178 __m128 e = _mm_mul_ps(o->coeff_xm1, o->xm1);
|
chris@162
|
179 __m128 f = _mm_mul_ps(o->coeff_xm2, o->xm2);
|
chris@162
|
180 __m128 g = _mm_mul_ps(o->coeff_ym1, o->ym1);
|
chris@162
|
181 __m128 h = _mm_mul_ps(o->coeff_ym2, o->ym2);
|
chris@162
|
182 __m128 i = _mm_add_ps(a, b);
|
chris@162
|
183 __m128 j = _mm_add_ps(c, d);
|
chris@162
|
184 __m128 k = _mm_add_ps(e, f);
|
chris@162
|
185 __m128 l = _mm_add_ps(g, h);
|
chris@162
|
186 __m128 m = _mm_add_ps(i, j);
|
chris@162
|
187 __m128 n = _mm_add_ps(k, l);
|
chris@162
|
188
|
chris@162
|
189 __m128 y = _mm_add_ps(m, n);
|
chris@162
|
190
|
chris@162
|
191 o->xm1 = x3;
|
chris@162
|
192 o->xm2 = x2;
|
chris@162
|
193 o->ym1 = _mm_set1_ps(y[3]);
|
chris@162
|
194 o->ym2 = _mm_set1_ps(y[2]);
|
chris@162
|
195
|
chris@162
|
196 *bOut = y;
|
chris@162
|
197 #elif HV_SIMD_NEON
|
chris@162
|
198 float32x4_t x3 = vdupq_n_f32(bIn[3]);
|
chris@162
|
199 float32x4_t x2 = vdupq_n_f32(bIn[2]);
|
chris@162
|
200 float32x4_t x1 = vdupq_n_f32(bIn[1]);
|
chris@162
|
201 float32x4_t x0 = vdupq_n_f32(bIn[0]);
|
chris@162
|
202
|
chris@162
|
203 float32x4_t a = vmulq_f32(o->coeff_xp3, x3);
|
chris@162
|
204 float32x4_t b = vmulq_f32(o->coeff_xp2, x2);
|
chris@162
|
205 float32x4_t c = vmulq_f32(o->coeff_xp1, x1);
|
chris@162
|
206 float32x4_t d = vmulq_f32(o->coeff_x0, x0);
|
chris@162
|
207 float32x4_t e = vmulq_f32(o->coeff_xm1, o->xm1);
|
chris@162
|
208 float32x4_t f = vmulq_f32(o->coeff_xm2, o->xm2);
|
chris@162
|
209 float32x4_t g = vmulq_f32(o->coeff_ym1, o->ym1);
|
chris@162
|
210 float32x4_t h = vmulq_f32(o->coeff_ym2, o->ym2);
|
chris@162
|
211 float32x4_t i = vaddq_f32(a, b);
|
chris@162
|
212 float32x4_t j = vaddq_f32(c, d);
|
chris@162
|
213 float32x4_t k = vaddq_f32(e, f);
|
chris@162
|
214 float32x4_t l = vaddq_f32(g, h);
|
chris@162
|
215 float32x4_t m = vaddq_f32(i, j);
|
chris@162
|
216 float32x4_t n = vaddq_f32(k, l);
|
chris@162
|
217 float32x4_t y = vaddq_f32(m, n);
|
chris@162
|
218
|
chris@162
|
219 o->xm1 = x3;
|
chris@162
|
220 o->xm2 = x2;
|
chris@162
|
221 o->ym1 = vdupq_n_f32(y[3]);
|
chris@162
|
222 o->ym2 = vdupq_n_f32(y[2]);
|
chris@162
|
223
|
chris@162
|
224 *bOut = y;
|
chris@162
|
225 #else // HV_SIMD_NONE
|
chris@162
|
226 float y = o->b0*bIn + o->b1*o->xm1 + o->b2*o->xm2 - o->a1*o->ym1 - o->a2*o->ym2;
|
chris@162
|
227 o->xm2 = o->xm1;
|
chris@162
|
228 o->xm1 = bIn;
|
chris@162
|
229 o->ym2 = o->ym1;
|
chris@162
|
230 o->ym1 = y;
|
chris@162
|
231 *bOut = y;
|
chris@162
|
232 #endif
|
chris@162
|
233 }
|
chris@162
|
234
|
chris@162
|
235 #endif // _HEAVY_SIGNAL_BIQUAD_H_
|