chris@160
|
1 /**
|
chris@160
|
2 * Copyright (c) 2014, 2015, Enzien Audio Ltd.
|
chris@160
|
3 *
|
chris@160
|
4 * Permission to use, copy, modify, and/or distribute this software for any
|
chris@160
|
5 * purpose with or without fee is hereby granted, provided that the above
|
chris@160
|
6 * copyright notice and this permission notice appear in all copies.
|
chris@160
|
7 *
|
chris@160
|
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
|
chris@160
|
9 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
|
chris@160
|
10 * AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
|
chris@160
|
11 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
|
chris@160
|
12 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
|
chris@160
|
13 * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
chris@160
|
14 * PERFORMANCE OF THIS SOFTWARE.
|
chris@160
|
15 */
|
chris@160
|
16
|
chris@160
|
17 #ifndef _SIGNAL_SAMPHOLD_H_
|
chris@160
|
18 #define _SIGNAL_SAMPHOLD_H_
|
chris@160
|
19
|
chris@160
|
20 #include "HvBase.h"
|
chris@160
|
21
|
chris@160
|
22 typedef struct SignalSamphold {
|
chris@160
|
23 hv_bufferf_t s;
|
chris@160
|
24 } SignalSamphold;
|
chris@160
|
25
|
chris@160
|
26 hv_size_t sSamphold_init(SignalSamphold *o);
|
chris@160
|
27
|
chris@160
|
28 static inline void __hv_samphold_f(SignalSamphold *o, hv_bInf_t bIn0, hv_bInf_t bIn1, hv_bOutf_t bOut) {
|
chris@160
|
29 #if HV_SIMD_AVX
|
chris@160
|
30 #warning __hv_samphold_f() not implemented
|
chris@160
|
31 #elif HV_SIMD_SSE
|
chris@160
|
32 switch (_mm_movemask_ps(bIn1)) {
|
chris@160
|
33 default:
|
chris@160
|
34 case 0x0: *bOut = o->s; break;
|
chris@160
|
35 case 0x1: {
|
chris@160
|
36 *bOut = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(0,0,0,0));
|
chris@160
|
37 o->s = *bOut;
|
chris@160
|
38 break;
|
chris@160
|
39 }
|
chris@160
|
40 case 0x2: {
|
chris@160
|
41 const __m128 x = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(1,1,1,1));
|
chris@160
|
42 *bOut = _mm_blend_ps(o->s, x, 0xE);
|
chris@160
|
43 o->s = x;
|
chris@160
|
44 break;
|
chris@160
|
45 }
|
chris@160
|
46 case 0x3: {
|
chris@160
|
47 const __m128 x = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(1,1,1,1));
|
chris@160
|
48 *bOut = _mm_blend_ps(bIn0, x, 0xC);
|
chris@160
|
49 o->s = x;
|
chris@160
|
50 break;
|
chris@160
|
51 }
|
chris@160
|
52 case 0x4: {
|
chris@160
|
53 const __m128 x = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(2,2,2,2));
|
chris@160
|
54 *bOut = _mm_blend_ps(o->s, x, 0xC);
|
chris@160
|
55 o->s = x;
|
chris@160
|
56 break;
|
chris@160
|
57 }
|
chris@160
|
58 case 0x5: {
|
chris@160
|
59 *bOut = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(2,2,0,0));
|
chris@160
|
60 o->s = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(2,2,2,2));
|
chris@160
|
61 break;
|
chris@160
|
62 }
|
chris@160
|
63 case 0x6: {
|
chris@160
|
64 const __m128 x = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(2,2,1,0));
|
chris@160
|
65 *bOut = _mm_blend_ps(o->s, x, 0xE);
|
chris@160
|
66 o->s = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(2,2,2,2));
|
chris@160
|
67 break;
|
chris@160
|
68 }
|
chris@160
|
69 case 0x7: {
|
chris@160
|
70 const __m128 x = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(2,2,2,2));
|
chris@160
|
71 *bOut = _mm_blend_ps(bIn0, x, 0x8);
|
chris@160
|
72 o->s = x;
|
chris@160
|
73 break;
|
chris@160
|
74 }
|
chris@160
|
75 case 0x8: {
|
chris@160
|
76 const __m128 x = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(3,3,3,3));
|
chris@160
|
77 *bOut = _mm_blend_ps(o->s, x, 0x8);
|
chris@160
|
78 o->s = x;
|
chris@160
|
79 break;
|
chris@160
|
80 }
|
chris@160
|
81 case 0x9: {
|
chris@160
|
82 *bOut = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(3,0,0,0));
|
chris@160
|
83 o->s = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(3,3,3,3));
|
chris@160
|
84 break;
|
chris@160
|
85 }
|
chris@160
|
86 case 0xA: {
|
chris@160
|
87 const __m128 x = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(3,1,1,0));
|
chris@160
|
88 *bOut = _mm_blend_ps(o->s, x, 0xE);
|
chris@160
|
89 o->s = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(3,3,3,3));
|
chris@160
|
90 break;
|
chris@160
|
91 }
|
chris@160
|
92 case 0xB: {
|
chris@160
|
93 *bOut = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(3,1,1,0));
|
chris@160
|
94 o->s = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(3,3,3,3));
|
chris@160
|
95 break;
|
chris@160
|
96 }
|
chris@160
|
97 case 0xC: {
|
chris@160
|
98 *bOut = _mm_blend_ps(o->s, bIn0, 0xC);
|
chris@160
|
99 o->s = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(3,3,3,3));
|
chris@160
|
100 break;
|
chris@160
|
101 }
|
chris@160
|
102 case 0xD: {
|
chris@160
|
103 *bOut = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(3,2,0,0));
|
chris@160
|
104 o->s = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(3,3,3,3));
|
chris@160
|
105 break;
|
chris@160
|
106 }
|
chris@160
|
107 case 0xE: {
|
chris@160
|
108 *bOut = _mm_blend_ps(o->s, bIn0, 0xE);
|
chris@160
|
109 o->s = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(3,3,3,3));
|
chris@160
|
110 break;
|
chris@160
|
111 }
|
chris@160
|
112 case 0xF: {
|
chris@160
|
113 *bOut = bIn0;
|
chris@160
|
114 o->s = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(3,3,3,3));
|
chris@160
|
115 break;
|
chris@160
|
116 }
|
chris@160
|
117 }
|
chris@160
|
118 #elif HV_SIMD_NEON
|
chris@160
|
119 uint32x4_t mmA = vandq_u32(
|
chris@160
|
120 vreinterpretq_u32_f32(bIn1), (uint32x4_t) {0x1, 0x2, 0x4, 0x8}); // [0 1 2 3]
|
chris@160
|
121 uint32x4_t mmB = vextq_u32(mmA, mmA, 2); // [2 3 0 1]
|
chris@160
|
122 uint32x4_t mmC = vorrq_u32(mmA, mmB); // [0+2 1+3 0+2 1+3]
|
chris@160
|
123 uint32x4_t mmD = vextq_u32(mmC, mmC, 3); // [1+3 0+2 1+3 0+2]
|
chris@160
|
124 uint32x4_t mmE = vorrq_u32(mmC, mmD); // [0+1+2+3 ...]
|
chris@160
|
125 uint32_t movemask = vgetq_lane_u32(mmE, 0);
|
chris@160
|
126 switch (movemask) {
|
chris@160
|
127 default:
|
chris@160
|
128 case 0x0: *bOut = o->s; break;
|
chris@160
|
129 case 0x1: {
|
chris@160
|
130 *bOut = vdupq_n_f32(vgetq_lane_f32(bIn0,0));
|
chris@160
|
131 o->s = *bOut;
|
chris@160
|
132 break;
|
chris@160
|
133 }
|
chris@160
|
134 case 0x2: {
|
chris@160
|
135 const float32x4_t x = vdupq_n_f32(vgetq_lane_f32(bIn0,1));
|
chris@160
|
136 *bOut = vextq_f32(o->s, x, 3);
|
chris@160
|
137 o->s = x;
|
chris@160
|
138 break;
|
chris@160
|
139 }
|
chris@160
|
140 case 0x3: {
|
chris@160
|
141 const float32x4_t x = vdupq_n_f32(vgetq_lane_f32(bIn0,1));
|
chris@160
|
142 *bOut = vreinterpretq_f32_u32(vorrq_u32(
|
chris@160
|
143 vandq_u32(vreinterpretq_u32_f32(bIn0), (uint32x4_t) {~0x0, 0x0, 0x0, 0x0}),
|
chris@160
|
144 vandq_u32(vreinterpretq_u32_f32(x), (uint32x4_t) {0x0, ~0x0, ~0x0, ~0x0})));
|
chris@160
|
145 o->s = x;
|
chris@160
|
146 break;
|
chris@160
|
147 }
|
chris@160
|
148 case 0x4: {
|
chris@160
|
149 const float32x4_t x = vdupq_n_f32(vgetq_lane_f32(bIn0,2));
|
chris@160
|
150 *bOut = vextq_f32(o->s, x, 2);
|
chris@160
|
151 o->s = x;
|
chris@160
|
152 break;
|
chris@160
|
153 }
|
chris@160
|
154 case 0x5: {
|
chris@160
|
155 const float32x4_t x = vdupq_n_f32(vgetq_lane_f32(bIn0,0));
|
chris@160
|
156 const float32x4_t y = vdupq_n_f32(vgetq_lane_f32(bIn0,2));
|
chris@160
|
157 *bOut = vreinterpretq_f32_u32(vorrq_u32(
|
chris@160
|
158 vandq_u32(vreinterpretq_u32_f32(x), (uint32x4_t) {~0x0, ~0x0, 0x0, 0x0}),
|
chris@160
|
159 vandq_u32(vreinterpretq_u32_f32(y), (uint32x4_t) {0x0, 0x0, ~0x0, ~0x0})));
|
chris@160
|
160 o->s = y;
|
chris@160
|
161 }
|
chris@160
|
162 case 0x6: {
|
chris@160
|
163 const float32x4_t y = vdupq_n_f32(vgetq_lane_f32(bIn0,3));
|
chris@160
|
164 float32x4_t z = vreinterpretq_f32_u32(vorrq_u32(
|
chris@160
|
165 vandq_u32(vreinterpretq_u32_f32(o->s), (uint32x4_t) {~0x0, 0x0, 0x0, 0x0}),
|
chris@160
|
166 vandq_u32(vreinterpretq_u32_f32(bIn0), (uint32x4_t) {0x0, ~0x0, ~0x0, 0x0})));
|
chris@160
|
167 *bOut = vreinterpretq_f32_u32(vorrq_u32(
|
chris@160
|
168 vandq_u32(vreinterpretq_u32_f32(z), (uint32x4_t) {~0x0, ~0x0, ~0x0, 0x0}),
|
chris@160
|
169 vandq_u32(vreinterpretq_u32_f32(y), (uint32x4_t) {0x0, 0x0, 0x0, ~0x0})));
|
chris@160
|
170 o->s = y;
|
chris@160
|
171 }
|
chris@160
|
172 case 0x7: {
|
chris@160
|
173 const float32x4_t x = vdupq_n_f32(vgetq_lane_f32(bIn0,2));
|
chris@160
|
174 *bOut = vreinterpretq_f32_u32(vorrq_u32(
|
chris@160
|
175 vandq_u32(vreinterpretq_u32_f32(bIn0), (uint32x4_t) {~0x0, ~0x0, 0x0, 0x0}),
|
chris@160
|
176 vandq_u32(vreinterpretq_u32_f32(x), (uint32x4_t) {0x0, 0x0, ~0x0, ~0x0})));
|
chris@160
|
177 o->s = x;
|
chris@160
|
178 break;
|
chris@160
|
179 }
|
chris@160
|
180 case 0x8: {
|
chris@160
|
181 const float32x4_t x = vdupq_n_f32(vgetq_lane_f32(bIn0,3));
|
chris@160
|
182 *bOut = vextq_f32(o->s, x, 1);
|
chris@160
|
183 o->s = x;
|
chris@160
|
184 break;
|
chris@160
|
185 }
|
chris@160
|
186 case 0x9: {
|
chris@160
|
187 const float32x4_t x = vdupq_n_f32(vgetq_lane_f32(bIn0,0));
|
chris@160
|
188 *bOut = vreinterpretq_f32_u32(vorrq_u32(
|
chris@160
|
189 vandq_u32(vreinterpretq_u32_f32(x), (uint32x4_t) {~0x0, ~0x0, ~0x0, 0x0}),
|
chris@160
|
190 vandq_u32(vreinterpretq_u32_f32(bIn0), (uint32x4_t) {0x0, 0x0, 0x0, ~0x0})));
|
chris@160
|
191 o->s = vdupq_n_f32(vgetq_lane_f32(bIn0,3));
|
chris@160
|
192 }
|
chris@160
|
193 case 0xA: {
|
chris@160
|
194 const float32x4_t x = vdupq_n_f32(vgetq_lane_f32(bIn0,1));
|
chris@160
|
195 const float32x4_t y = vdupq_n_f32(vgetq_lane_f32(bIn0,3));
|
chris@160
|
196 float32x4_t z = vreinterpretq_f32_u32(vorrq_u32(
|
chris@160
|
197 vandq_u32(vreinterpretq_u32_f32(o->s), (uint32x4_t) {~0x0, 0x0, 0x0, 0x0}),
|
chris@160
|
198 vandq_u32(vreinterpretq_u32_f32(x), (uint32x4_t) {0x0, ~0x0, ~0x0, 0x0})));
|
chris@160
|
199 *bOut = vreinterpretq_f32_u32(vorrq_u32(
|
chris@160
|
200 vandq_u32(vreinterpretq_u32_f32(z), (uint32x4_t) {~0x0, ~0x0, ~0x0, 0x0}),
|
chris@160
|
201 vandq_u32(vreinterpretq_u32_f32(y), (uint32x4_t) {0x0, 0x0, 0x0, ~0x0})));
|
chris@160
|
202 o->s = y;
|
chris@160
|
203 }
|
chris@160
|
204 case 0xB: {
|
chris@160
|
205 const float32x4_t x = vdupq_n_f32(vgetq_lane_f32(bIn0,1));
|
chris@160
|
206 *bOut = vreinterpretq_f32_u32(vorrq_u32(
|
chris@160
|
207 vandq_u32(vreinterpretq_u32_f32(bIn0), (uint32x4_t) {~0x0, ~0x0, 0x0, ~0x0}),
|
chris@160
|
208 vandq_u32(vreinterpretq_u32_f32(x), (uint32x4_t) {0x0, 0x0, ~0x0, 0x0})));
|
chris@160
|
209 o->s = vdupq_n_f32(vgetq_lane_f32(bIn0,3));
|
chris@160
|
210 break;
|
chris@160
|
211 }
|
chris@160
|
212 case 0xC: {
|
chris@160
|
213 *bOut = vreinterpretq_f32_u32(vorrq_u32(
|
chris@160
|
214 vandq_u32(vreinterpretq_u32_f32(o->s), (uint32x4_t) {~0x0, ~0x0, 0x0, 0x0}),
|
chris@160
|
215 vandq_u32(vreinterpretq_u32_f32(bIn0), (uint32x4_t) {0x0, 0x0, ~0x0, ~0x0})));
|
chris@160
|
216 o->s = vdupq_n_f32(vgetq_lane_f32(bIn0,3));
|
chris@160
|
217 break;
|
chris@160
|
218 }
|
chris@160
|
219 case 0xD: {
|
chris@160
|
220 const float32x4_t x = vdupq_n_f32(vgetq_lane_f32(bIn0,0));
|
chris@160
|
221 *bOut = vreinterpretq_f32_u32(vorrq_u32(
|
chris@160
|
222 vandq_u32(vreinterpretq_u32_f32(bIn0), (uint32x4_t) {~0x0, 0x0, ~0x0, ~0x0}),
|
chris@160
|
223 vandq_u32(vreinterpretq_u32_f32(x), (uint32x4_t) {0x0, ~0x0, 0x0, 0x0})));
|
chris@160
|
224 o->s = vdupq_n_f32(vgetq_lane_f32(bIn0,3));
|
chris@160
|
225 }
|
chris@160
|
226 case 0xE: {
|
chris@160
|
227 *bOut = vreinterpretq_f32_u32(vorrq_u32(
|
chris@160
|
228 vandq_u32(vreinterpretq_u32_f32(o->s), (uint32x4_t) {~0x0, 0x0, 0x0, 0x0}),
|
chris@160
|
229 vandq_u32(vreinterpretq_u32_f32(bIn0), (uint32x4_t) {0x0, ~0x0, ~0x0, ~0x0})));
|
chris@160
|
230 o->s = vdupq_n_f32(vgetq_lane_f32(bIn0,3));
|
chris@160
|
231 break;
|
chris@160
|
232 }
|
chris@160
|
233 case 0xF: {
|
chris@160
|
234 *bOut = bIn0;
|
chris@160
|
235 o->s = vdupq_n_f32(vgetq_lane_f32(bIn0,3));
|
chris@160
|
236 break;
|
chris@160
|
237 }
|
chris@160
|
238 }
|
chris@160
|
239 #else // HV_SIMD_NONE
|
chris@160
|
240 if (bIn1 != 0.0f) o->s = bIn0;
|
chris@160
|
241 *bOut = o->s;
|
chris@160
|
242 #endif
|
chris@160
|
243 }
|
chris@160
|
244
|
chris@160
|
245 void sSamphold_onMessage(HvBase *_c, SignalSamphold *o, int letIndex,
|
chris@160
|
246 const HvMessage *const m, void *sendMessage);
|
chris@160
|
247
|
chris@160
|
248 #endif // _SIGNAL_SAMPHOLD_H_
|