annotate projects/heavy/samphold/SignalSamphold.h @ 163:20b52283c7b4 heavy-updated

- added circular buffer pd/heavy example (works but process needs to be killed manually if launched via ssh?)
author chnrx <chris.heinrichs@gmail.com>
date Thu, 12 Nov 2015 15:55:30 +0000
parents 5bcf04234f80
children
rev   line source
chris@160 1 /**
chris@160 2 * Copyright (c) 2014, 2015, Enzien Audio Ltd.
chris@160 3 *
chris@160 4 * Permission to use, copy, modify, and/or distribute this software for any
chris@160 5 * purpose with or without fee is hereby granted, provided that the above
chris@160 6 * copyright notice and this permission notice appear in all copies.
chris@160 7 *
chris@160 8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
chris@160 9 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
chris@160 10 * AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
chris@160 11 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
chris@160 12 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
chris@160 13 * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
chris@160 14 * PERFORMANCE OF THIS SOFTWARE.
chris@160 15 */
chris@160 16
chris@160 17 #ifndef _SIGNAL_SAMPHOLD_H_
chris@160 18 #define _SIGNAL_SAMPHOLD_H_
chris@160 19
chris@160 20 #include "HvBase.h"
chris@160 21
chris@160 22 typedef struct SignalSamphold {
chris@160 23 hv_bufferf_t s;
chris@160 24 } SignalSamphold;
chris@160 25
chris@160 26 hv_size_t sSamphold_init(SignalSamphold *o);
chris@160 27
chris@160 28 static inline void __hv_samphold_f(SignalSamphold *o, hv_bInf_t bIn0, hv_bInf_t bIn1, hv_bOutf_t bOut) {
chris@160 29 #if HV_SIMD_AVX
chris@160 30 #warning __hv_samphold_f() not implemented
chris@160 31 #elif HV_SIMD_SSE
chris@160 32 switch (_mm_movemask_ps(bIn1)) {
chris@160 33 default:
chris@160 34 case 0x0: *bOut = o->s; break;
chris@160 35 case 0x1: {
chris@160 36 *bOut = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(0,0,0,0));
chris@160 37 o->s = *bOut;
chris@160 38 break;
chris@160 39 }
chris@160 40 case 0x2: {
chris@160 41 const __m128 x = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(1,1,1,1));
chris@160 42 *bOut = _mm_blend_ps(o->s, x, 0xE);
chris@160 43 o->s = x;
chris@160 44 break;
chris@160 45 }
chris@160 46 case 0x3: {
chris@160 47 const __m128 x = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(1,1,1,1));
chris@160 48 *bOut = _mm_blend_ps(bIn0, x, 0xC);
chris@160 49 o->s = x;
chris@160 50 break;
chris@160 51 }
chris@160 52 case 0x4: {
chris@160 53 const __m128 x = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(2,2,2,2));
chris@160 54 *bOut = _mm_blend_ps(o->s, x, 0xC);
chris@160 55 o->s = x;
chris@160 56 break;
chris@160 57 }
chris@160 58 case 0x5: {
chris@160 59 *bOut = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(2,2,0,0));
chris@160 60 o->s = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(2,2,2,2));
chris@160 61 break;
chris@160 62 }
chris@160 63 case 0x6: {
chris@160 64 const __m128 x = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(2,2,1,0));
chris@160 65 *bOut = _mm_blend_ps(o->s, x, 0xE);
chris@160 66 o->s = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(2,2,2,2));
chris@160 67 break;
chris@160 68 }
chris@160 69 case 0x7: {
chris@160 70 const __m128 x = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(2,2,2,2));
chris@160 71 *bOut = _mm_blend_ps(bIn0, x, 0x8);
chris@160 72 o->s = x;
chris@160 73 break;
chris@160 74 }
chris@160 75 case 0x8: {
chris@160 76 const __m128 x = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(3,3,3,3));
chris@160 77 *bOut = _mm_blend_ps(o->s, x, 0x8);
chris@160 78 o->s = x;
chris@160 79 break;
chris@160 80 }
chris@160 81 case 0x9: {
chris@160 82 *bOut = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(3,0,0,0));
chris@160 83 o->s = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(3,3,3,3));
chris@160 84 break;
chris@160 85 }
chris@160 86 case 0xA: {
chris@160 87 const __m128 x = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(3,1,1,0));
chris@160 88 *bOut = _mm_blend_ps(o->s, x, 0xE);
chris@160 89 o->s = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(3,3,3,3));
chris@160 90 break;
chris@160 91 }
chris@160 92 case 0xB: {
chris@160 93 *bOut = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(3,1,1,0));
chris@160 94 o->s = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(3,3,3,3));
chris@160 95 break;
chris@160 96 }
chris@160 97 case 0xC: {
chris@160 98 *bOut = _mm_blend_ps(o->s, bIn0, 0xC);
chris@160 99 o->s = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(3,3,3,3));
chris@160 100 break;
chris@160 101 }
chris@160 102 case 0xD: {
chris@160 103 *bOut = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(3,2,0,0));
chris@160 104 o->s = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(3,3,3,3));
chris@160 105 break;
chris@160 106 }
chris@160 107 case 0xE: {
chris@160 108 *bOut = _mm_blend_ps(o->s, bIn0, 0xE);
chris@160 109 o->s = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(3,3,3,3));
chris@160 110 break;
chris@160 111 }
chris@160 112 case 0xF: {
chris@160 113 *bOut = bIn0;
chris@160 114 o->s = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(3,3,3,3));
chris@160 115 break;
chris@160 116 }
chris@160 117 }
chris@160 118 #elif HV_SIMD_NEON
chris@160 119 uint32x4_t mmA = vandq_u32(
chris@160 120 vreinterpretq_u32_f32(bIn1), (uint32x4_t) {0x1, 0x2, 0x4, 0x8}); // [0 1 2 3]
chris@160 121 uint32x4_t mmB = vextq_u32(mmA, mmA, 2); // [2 3 0 1]
chris@160 122 uint32x4_t mmC = vorrq_u32(mmA, mmB); // [0+2 1+3 0+2 1+3]
chris@160 123 uint32x4_t mmD = vextq_u32(mmC, mmC, 3); // [1+3 0+2 1+3 0+2]
chris@160 124 uint32x4_t mmE = vorrq_u32(mmC, mmD); // [0+1+2+3 ...]
chris@160 125 uint32_t movemask = vgetq_lane_u32(mmE, 0);
chris@160 126 switch (movemask) {
chris@160 127 default:
chris@160 128 case 0x0: *bOut = o->s; break;
chris@160 129 case 0x1: {
chris@160 130 *bOut = vdupq_n_f32(vgetq_lane_f32(bIn0,0));
chris@160 131 o->s = *bOut;
chris@160 132 break;
chris@160 133 }
chris@160 134 case 0x2: {
chris@160 135 const float32x4_t x = vdupq_n_f32(vgetq_lane_f32(bIn0,1));
chris@160 136 *bOut = vextq_f32(o->s, x, 3);
chris@160 137 o->s = x;
chris@160 138 break;
chris@160 139 }
chris@160 140 case 0x3: {
chris@160 141 const float32x4_t x = vdupq_n_f32(vgetq_lane_f32(bIn0,1));
chris@160 142 *bOut = vreinterpretq_f32_u32(vorrq_u32(
chris@160 143 vandq_u32(vreinterpretq_u32_f32(bIn0), (uint32x4_t) {~0x0, 0x0, 0x0, 0x0}),
chris@160 144 vandq_u32(vreinterpretq_u32_f32(x), (uint32x4_t) {0x0, ~0x0, ~0x0, ~0x0})));
chris@160 145 o->s = x;
chris@160 146 break;
chris@160 147 }
chris@160 148 case 0x4: {
chris@160 149 const float32x4_t x = vdupq_n_f32(vgetq_lane_f32(bIn0,2));
chris@160 150 *bOut = vextq_f32(o->s, x, 2);
chris@160 151 o->s = x;
chris@160 152 break;
chris@160 153 }
chris@160 154 case 0x5: {
chris@160 155 const float32x4_t x = vdupq_n_f32(vgetq_lane_f32(bIn0,0));
chris@160 156 const float32x4_t y = vdupq_n_f32(vgetq_lane_f32(bIn0,2));
chris@160 157 *bOut = vreinterpretq_f32_u32(vorrq_u32(
chris@160 158 vandq_u32(vreinterpretq_u32_f32(x), (uint32x4_t) {~0x0, ~0x0, 0x0, 0x0}),
chris@160 159 vandq_u32(vreinterpretq_u32_f32(y), (uint32x4_t) {0x0, 0x0, ~0x0, ~0x0})));
chris@160 160 o->s = y;
chris@160 161 }
chris@160 162 case 0x6: {
chris@160 163 const float32x4_t y = vdupq_n_f32(vgetq_lane_f32(bIn0,3));
chris@160 164 float32x4_t z = vreinterpretq_f32_u32(vorrq_u32(
chris@160 165 vandq_u32(vreinterpretq_u32_f32(o->s), (uint32x4_t) {~0x0, 0x0, 0x0, 0x0}),
chris@160 166 vandq_u32(vreinterpretq_u32_f32(bIn0), (uint32x4_t) {0x0, ~0x0, ~0x0, 0x0})));
chris@160 167 *bOut = vreinterpretq_f32_u32(vorrq_u32(
chris@160 168 vandq_u32(vreinterpretq_u32_f32(z), (uint32x4_t) {~0x0, ~0x0, ~0x0, 0x0}),
chris@160 169 vandq_u32(vreinterpretq_u32_f32(y), (uint32x4_t) {0x0, 0x0, 0x0, ~0x0})));
chris@160 170 o->s = y;
chris@160 171 }
chris@160 172 case 0x7: {
chris@160 173 const float32x4_t x = vdupq_n_f32(vgetq_lane_f32(bIn0,2));
chris@160 174 *bOut = vreinterpretq_f32_u32(vorrq_u32(
chris@160 175 vandq_u32(vreinterpretq_u32_f32(bIn0), (uint32x4_t) {~0x0, ~0x0, 0x0, 0x0}),
chris@160 176 vandq_u32(vreinterpretq_u32_f32(x), (uint32x4_t) {0x0, 0x0, ~0x0, ~0x0})));
chris@160 177 o->s = x;
chris@160 178 break;
chris@160 179 }
chris@160 180 case 0x8: {
chris@160 181 const float32x4_t x = vdupq_n_f32(vgetq_lane_f32(bIn0,3));
chris@160 182 *bOut = vextq_f32(o->s, x, 1);
chris@160 183 o->s = x;
chris@160 184 break;
chris@160 185 }
chris@160 186 case 0x9: {
chris@160 187 const float32x4_t x = vdupq_n_f32(vgetq_lane_f32(bIn0,0));
chris@160 188 *bOut = vreinterpretq_f32_u32(vorrq_u32(
chris@160 189 vandq_u32(vreinterpretq_u32_f32(x), (uint32x4_t) {~0x0, ~0x0, ~0x0, 0x0}),
chris@160 190 vandq_u32(vreinterpretq_u32_f32(bIn0), (uint32x4_t) {0x0, 0x0, 0x0, ~0x0})));
chris@160 191 o->s = vdupq_n_f32(vgetq_lane_f32(bIn0,3));
chris@160 192 }
chris@160 193 case 0xA: {
chris@160 194 const float32x4_t x = vdupq_n_f32(vgetq_lane_f32(bIn0,1));
chris@160 195 const float32x4_t y = vdupq_n_f32(vgetq_lane_f32(bIn0,3));
chris@160 196 float32x4_t z = vreinterpretq_f32_u32(vorrq_u32(
chris@160 197 vandq_u32(vreinterpretq_u32_f32(o->s), (uint32x4_t) {~0x0, 0x0, 0x0, 0x0}),
chris@160 198 vandq_u32(vreinterpretq_u32_f32(x), (uint32x4_t) {0x0, ~0x0, ~0x0, 0x0})));
chris@160 199 *bOut = vreinterpretq_f32_u32(vorrq_u32(
chris@160 200 vandq_u32(vreinterpretq_u32_f32(z), (uint32x4_t) {~0x0, ~0x0, ~0x0, 0x0}),
chris@160 201 vandq_u32(vreinterpretq_u32_f32(y), (uint32x4_t) {0x0, 0x0, 0x0, ~0x0})));
chris@160 202 o->s = y;
chris@160 203 }
chris@160 204 case 0xB: {
chris@160 205 const float32x4_t x = vdupq_n_f32(vgetq_lane_f32(bIn0,1));
chris@160 206 *bOut = vreinterpretq_f32_u32(vorrq_u32(
chris@160 207 vandq_u32(vreinterpretq_u32_f32(bIn0), (uint32x4_t) {~0x0, ~0x0, 0x0, ~0x0}),
chris@160 208 vandq_u32(vreinterpretq_u32_f32(x), (uint32x4_t) {0x0, 0x0, ~0x0, 0x0})));
chris@160 209 o->s = vdupq_n_f32(vgetq_lane_f32(bIn0,3));
chris@160 210 break;
chris@160 211 }
chris@160 212 case 0xC: {
chris@160 213 *bOut = vreinterpretq_f32_u32(vorrq_u32(
chris@160 214 vandq_u32(vreinterpretq_u32_f32(o->s), (uint32x4_t) {~0x0, ~0x0, 0x0, 0x0}),
chris@160 215 vandq_u32(vreinterpretq_u32_f32(bIn0), (uint32x4_t) {0x0, 0x0, ~0x0, ~0x0})));
chris@160 216 o->s = vdupq_n_f32(vgetq_lane_f32(bIn0,3));
chris@160 217 break;
chris@160 218 }
chris@160 219 case 0xD: {
chris@160 220 const float32x4_t x = vdupq_n_f32(vgetq_lane_f32(bIn0,0));
chris@160 221 *bOut = vreinterpretq_f32_u32(vorrq_u32(
chris@160 222 vandq_u32(vreinterpretq_u32_f32(bIn0), (uint32x4_t) {~0x0, 0x0, ~0x0, ~0x0}),
chris@160 223 vandq_u32(vreinterpretq_u32_f32(x), (uint32x4_t) {0x0, ~0x0, 0x0, 0x0})));
chris@160 224 o->s = vdupq_n_f32(vgetq_lane_f32(bIn0,3));
chris@160 225 }
chris@160 226 case 0xE: {
chris@160 227 *bOut = vreinterpretq_f32_u32(vorrq_u32(
chris@160 228 vandq_u32(vreinterpretq_u32_f32(o->s), (uint32x4_t) {~0x0, 0x0, 0x0, 0x0}),
chris@160 229 vandq_u32(vreinterpretq_u32_f32(bIn0), (uint32x4_t) {0x0, ~0x0, ~0x0, ~0x0})));
chris@160 230 o->s = vdupq_n_f32(vgetq_lane_f32(bIn0,3));
chris@160 231 break;
chris@160 232 }
chris@160 233 case 0xF: {
chris@160 234 *bOut = bIn0;
chris@160 235 o->s = vdupq_n_f32(vgetq_lane_f32(bIn0,3));
chris@160 236 break;
chris@160 237 }
chris@160 238 }
chris@160 239 #else // HV_SIMD_NONE
chris@160 240 if (bIn1 != 0.0f) o->s = bIn0;
chris@160 241 *bOut = o->s;
chris@160 242 #endif
chris@160 243 }
chris@160 244
chris@160 245 void sSamphold_onMessage(HvBase *_c, SignalSamphold *o, int letIndex,
chris@160 246 const HvMessage *const m, void *sendMessage);
chris@160 247
chris@160 248 #endif // _SIGNAL_SAMPHOLD_H_