chris@160: /** chris@160: * Copyright (c) 2014, 2015, Enzien Audio Ltd. chris@160: * chris@160: * Permission to use, copy, modify, and/or distribute this software for any chris@160: * purpose with or without fee is hereby granted, provided that the above chris@160: * copyright notice and this permission notice appear in all copies. chris@160: * chris@160: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH chris@160: * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY chris@160: * AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, chris@160: * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM chris@160: * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR chris@160: * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR chris@160: * PERFORMANCE OF THIS SOFTWARE. chris@160: */ chris@160: chris@160: #ifndef _SIGNAL_SAMPHOLD_H_ chris@160: #define _SIGNAL_SAMPHOLD_H_ chris@160: chris@160: #include "HvBase.h" chris@160: chris@160: typedef struct SignalSamphold { chris@160: hv_bufferf_t s; chris@160: } SignalSamphold; chris@160: chris@160: hv_size_t sSamphold_init(SignalSamphold *o); chris@160: chris@160: static inline void __hv_samphold_f(SignalSamphold *o, hv_bInf_t bIn0, hv_bInf_t bIn1, hv_bOutf_t bOut) { chris@160: #if HV_SIMD_AVX chris@160: #warning __hv_samphold_f() not implemented chris@160: #elif HV_SIMD_SSE chris@160: switch (_mm_movemask_ps(bIn1)) { chris@160: default: chris@160: case 0x0: *bOut = o->s; break; chris@160: case 0x1: { chris@160: *bOut = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(0,0,0,0)); chris@160: o->s = *bOut; chris@160: break; chris@160: } chris@160: case 0x2: { chris@160: const __m128 x = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(1,1,1,1)); chris@160: *bOut = _mm_blend_ps(o->s, x, 0xE); chris@160: o->s = x; chris@160: break; chris@160: } chris@160: case 0x3: { chris@160: const __m128 x = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(1,1,1,1)); chris@160: *bOut = _mm_blend_ps(bIn0, x, 0xC); chris@160: o->s = x; chris@160: break; chris@160: } chris@160: case 0x4: { chris@160: const __m128 x = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(2,2,2,2)); chris@160: *bOut = _mm_blend_ps(o->s, x, 0xC); chris@160: o->s = x; chris@160: break; chris@160: } chris@160: case 0x5: { chris@160: *bOut = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(2,2,0,0)); chris@160: o->s = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(2,2,2,2)); chris@160: break; chris@160: } chris@160: case 0x6: { chris@160: const __m128 x = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(2,2,1,0)); chris@160: *bOut = _mm_blend_ps(o->s, x, 0xE); chris@160: o->s = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(2,2,2,2)); chris@160: break; chris@160: } chris@160: case 0x7: { chris@160: const __m128 x = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(2,2,2,2)); chris@160: *bOut = _mm_blend_ps(bIn0, x, 0x8); chris@160: o->s = x; chris@160: break; chris@160: } chris@160: case 0x8: { chris@160: const __m128 x = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(3,3,3,3)); chris@160: *bOut = _mm_blend_ps(o->s, x, 0x8); chris@160: o->s = x; chris@160: break; chris@160: } chris@160: case 0x9: { chris@160: *bOut = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(3,0,0,0)); chris@160: o->s = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(3,3,3,3)); chris@160: break; chris@160: } chris@160: case 0xA: { chris@160: const __m128 x = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(3,1,1,0)); chris@160: *bOut = _mm_blend_ps(o->s, x, 0xE); chris@160: o->s = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(3,3,3,3)); chris@160: break; chris@160: } chris@160: case 0xB: { chris@160: *bOut = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(3,1,1,0)); chris@160: o->s = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(3,3,3,3)); chris@160: break; chris@160: } chris@160: case 0xC: { chris@160: *bOut = _mm_blend_ps(o->s, bIn0, 0xC); chris@160: o->s = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(3,3,3,3)); chris@160: break; chris@160: } chris@160: case 0xD: { chris@160: *bOut = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(3,2,0,0)); chris@160: o->s = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(3,3,3,3)); chris@160: break; chris@160: } chris@160: case 0xE: { chris@160: *bOut = _mm_blend_ps(o->s, bIn0, 0xE); chris@160: o->s = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(3,3,3,3)); chris@160: break; chris@160: } chris@160: case 0xF: { chris@160: *bOut = bIn0; chris@160: o->s = _mm_shuffle_ps(bIn0, bIn0, _MM_SHUFFLE(3,3,3,3)); chris@160: break; chris@160: } chris@160: } chris@160: #elif HV_SIMD_NEON chris@160: uint32x4_t mmA = vandq_u32( chris@160: vreinterpretq_u32_f32(bIn1), (uint32x4_t) {0x1, 0x2, 0x4, 0x8}); // [0 1 2 3] chris@160: uint32x4_t mmB = vextq_u32(mmA, mmA, 2); // [2 3 0 1] chris@160: uint32x4_t mmC = vorrq_u32(mmA, mmB); // [0+2 1+3 0+2 1+3] chris@160: uint32x4_t mmD = vextq_u32(mmC, mmC, 3); // [1+3 0+2 1+3 0+2] chris@160: uint32x4_t mmE = vorrq_u32(mmC, mmD); // [0+1+2+3 ...] chris@160: uint32_t movemask = vgetq_lane_u32(mmE, 0); chris@160: switch (movemask) { chris@160: default: chris@160: case 0x0: *bOut = o->s; break; chris@160: case 0x1: { chris@160: *bOut = vdupq_n_f32(vgetq_lane_f32(bIn0,0)); chris@160: o->s = *bOut; chris@160: break; chris@160: } chris@160: case 0x2: { chris@160: const float32x4_t x = vdupq_n_f32(vgetq_lane_f32(bIn0,1)); chris@160: *bOut = vextq_f32(o->s, x, 3); chris@160: o->s = x; chris@160: break; chris@160: } chris@160: case 0x3: { chris@160: const float32x4_t x = vdupq_n_f32(vgetq_lane_f32(bIn0,1)); chris@160: *bOut = vreinterpretq_f32_u32(vorrq_u32( chris@160: vandq_u32(vreinterpretq_u32_f32(bIn0), (uint32x4_t) {~0x0, 0x0, 0x0, 0x0}), chris@160: vandq_u32(vreinterpretq_u32_f32(x), (uint32x4_t) {0x0, ~0x0, ~0x0, ~0x0}))); chris@160: o->s = x; chris@160: break; chris@160: } chris@160: case 0x4: { chris@160: const float32x4_t x = vdupq_n_f32(vgetq_lane_f32(bIn0,2)); chris@160: *bOut = vextq_f32(o->s, x, 2); chris@160: o->s = x; chris@160: break; chris@160: } chris@160: case 0x5: { chris@160: const float32x4_t x = vdupq_n_f32(vgetq_lane_f32(bIn0,0)); chris@160: const float32x4_t y = vdupq_n_f32(vgetq_lane_f32(bIn0,2)); chris@160: *bOut = vreinterpretq_f32_u32(vorrq_u32( chris@160: vandq_u32(vreinterpretq_u32_f32(x), (uint32x4_t) {~0x0, ~0x0, 0x0, 0x0}), chris@160: vandq_u32(vreinterpretq_u32_f32(y), (uint32x4_t) {0x0, 0x0, ~0x0, ~0x0}))); chris@160: o->s = y; chris@160: } chris@160: case 0x6: { chris@160: const float32x4_t y = vdupq_n_f32(vgetq_lane_f32(bIn0,3)); chris@160: float32x4_t z = vreinterpretq_f32_u32(vorrq_u32( chris@160: vandq_u32(vreinterpretq_u32_f32(o->s), (uint32x4_t) {~0x0, 0x0, 0x0, 0x0}), chris@160: vandq_u32(vreinterpretq_u32_f32(bIn0), (uint32x4_t) {0x0, ~0x0, ~0x0, 0x0}))); chris@160: *bOut = vreinterpretq_f32_u32(vorrq_u32( chris@160: vandq_u32(vreinterpretq_u32_f32(z), (uint32x4_t) {~0x0, ~0x0, ~0x0, 0x0}), chris@160: vandq_u32(vreinterpretq_u32_f32(y), (uint32x4_t) {0x0, 0x0, 0x0, ~0x0}))); chris@160: o->s = y; chris@160: } chris@160: case 0x7: { chris@160: const float32x4_t x = vdupq_n_f32(vgetq_lane_f32(bIn0,2)); chris@160: *bOut = vreinterpretq_f32_u32(vorrq_u32( chris@160: vandq_u32(vreinterpretq_u32_f32(bIn0), (uint32x4_t) {~0x0, ~0x0, 0x0, 0x0}), chris@160: vandq_u32(vreinterpretq_u32_f32(x), (uint32x4_t) {0x0, 0x0, ~0x0, ~0x0}))); chris@160: o->s = x; chris@160: break; chris@160: } chris@160: case 0x8: { chris@160: const float32x4_t x = vdupq_n_f32(vgetq_lane_f32(bIn0,3)); chris@160: *bOut = vextq_f32(o->s, x, 1); chris@160: o->s = x; chris@160: break; chris@160: } chris@160: case 0x9: { chris@160: const float32x4_t x = vdupq_n_f32(vgetq_lane_f32(bIn0,0)); chris@160: *bOut = vreinterpretq_f32_u32(vorrq_u32( chris@160: vandq_u32(vreinterpretq_u32_f32(x), (uint32x4_t) {~0x0, ~0x0, ~0x0, 0x0}), chris@160: vandq_u32(vreinterpretq_u32_f32(bIn0), (uint32x4_t) {0x0, 0x0, 0x0, ~0x0}))); chris@160: o->s = vdupq_n_f32(vgetq_lane_f32(bIn0,3)); chris@160: } chris@160: case 0xA: { chris@160: const float32x4_t x = vdupq_n_f32(vgetq_lane_f32(bIn0,1)); chris@160: const float32x4_t y = vdupq_n_f32(vgetq_lane_f32(bIn0,3)); chris@160: float32x4_t z = vreinterpretq_f32_u32(vorrq_u32( chris@160: vandq_u32(vreinterpretq_u32_f32(o->s), (uint32x4_t) {~0x0, 0x0, 0x0, 0x0}), chris@160: vandq_u32(vreinterpretq_u32_f32(x), (uint32x4_t) {0x0, ~0x0, ~0x0, 0x0}))); chris@160: *bOut = vreinterpretq_f32_u32(vorrq_u32( chris@160: vandq_u32(vreinterpretq_u32_f32(z), (uint32x4_t) {~0x0, ~0x0, ~0x0, 0x0}), chris@160: vandq_u32(vreinterpretq_u32_f32(y), (uint32x4_t) {0x0, 0x0, 0x0, ~0x0}))); chris@160: o->s = y; chris@160: } chris@160: case 0xB: { chris@160: const float32x4_t x = vdupq_n_f32(vgetq_lane_f32(bIn0,1)); chris@160: *bOut = vreinterpretq_f32_u32(vorrq_u32( chris@160: vandq_u32(vreinterpretq_u32_f32(bIn0), (uint32x4_t) {~0x0, ~0x0, 0x0, ~0x0}), chris@160: vandq_u32(vreinterpretq_u32_f32(x), (uint32x4_t) {0x0, 0x0, ~0x0, 0x0}))); chris@160: o->s = vdupq_n_f32(vgetq_lane_f32(bIn0,3)); chris@160: break; chris@160: } chris@160: case 0xC: { chris@160: *bOut = vreinterpretq_f32_u32(vorrq_u32( chris@160: vandq_u32(vreinterpretq_u32_f32(o->s), (uint32x4_t) {~0x0, ~0x0, 0x0, 0x0}), chris@160: vandq_u32(vreinterpretq_u32_f32(bIn0), (uint32x4_t) {0x0, 0x0, ~0x0, ~0x0}))); chris@160: o->s = vdupq_n_f32(vgetq_lane_f32(bIn0,3)); chris@160: break; chris@160: } chris@160: case 0xD: { chris@160: const float32x4_t x = vdupq_n_f32(vgetq_lane_f32(bIn0,0)); chris@160: *bOut = vreinterpretq_f32_u32(vorrq_u32( chris@160: vandq_u32(vreinterpretq_u32_f32(bIn0), (uint32x4_t) {~0x0, 0x0, ~0x0, ~0x0}), chris@160: vandq_u32(vreinterpretq_u32_f32(x), (uint32x4_t) {0x0, ~0x0, 0x0, 0x0}))); chris@160: o->s = vdupq_n_f32(vgetq_lane_f32(bIn0,3)); chris@160: } chris@160: case 0xE: { chris@160: *bOut = vreinterpretq_f32_u32(vorrq_u32( chris@160: vandq_u32(vreinterpretq_u32_f32(o->s), (uint32x4_t) {~0x0, 0x0, 0x0, 0x0}), chris@160: vandq_u32(vreinterpretq_u32_f32(bIn0), (uint32x4_t) {0x0, ~0x0, ~0x0, ~0x0}))); chris@160: o->s = vdupq_n_f32(vgetq_lane_f32(bIn0,3)); chris@160: break; chris@160: } chris@160: case 0xF: { chris@160: *bOut = bIn0; chris@160: o->s = vdupq_n_f32(vgetq_lane_f32(bIn0,3)); chris@160: break; chris@160: } chris@160: } chris@160: #else // HV_SIMD_NONE chris@160: if (bIn1 != 0.0f) o->s = bIn0; chris@160: *bOut = o->s; chris@160: #endif chris@160: } chris@160: chris@160: void sSamphold_onMessage(HvBase *_c, SignalSamphold *o, int letIndex, chris@160: const HvMessage *const m, void *sendMessage); chris@160: chris@160: #endif // _SIGNAL_SAMPHOLD_H_