beaglert: projects/d-box/audio

annotate projects/d-box/audio_routines.S @ 0:8a575ba3ab52

Initial commit.

author	andrewm
date	Fri, 31 Oct 2014 19:10:17 +0100
parents
children

rev	line source
andrewm@0	1 @
andrewm@0	2 @ audio_routines.S
andrewm@0	3 @
andrewm@0	4 @ NEON-based functions for time-critical audio processing
andrewm@0	5 @
andrewm@0	6 @ Andrew McPherson 2014
andrewm@0	7 @ Queen Mary University of London
andrewm@0	8 @
andrewm@0	9
andrewm@0	10 .syntax unified
andrewm@0	11 .arch armv7-a
andrewm@0	12 .fpu neon
andrewm@0	13
andrewm@0	14 @ void oscillator_bank_neon(int numAudioFrames, float *audioOut,
andrewm@0	15 @ int activePartialNum, int lookupTableSize,
andrewm@0	16 @ float phases, float frequencies, float *amplitudes,
andrewm@0	17 @ float freqDerivatives, float ampDerivatives,
andrewm@0	18 @ float *lookupTable);
andrewm@0	19
andrewm@0	20 @ Registers:
andrewm@0	21 @ r0: numAudioFrames How many frames to render
andrewm@0	22 @ r1: audioOut Buffer for audio output samples [stereo]
andrewm@0	23 @ r2: activePartialNum How many active partials to render
andrewm@0	24 @ r3: lookupTableSize Size of lookup table
andrewm@0	25 @ ---- other arguments start on the stack and are moved: -----
andrewm@0	26 @ r4: phases Phase of each oscillator (pointer)
andrewm@0	27 @ r5: frequencies Normalised frequency of each oscillator (pointer)
andrewm@0	28 @ r6: amplitudes Normalised amplitude of each oscillator (pointer)
andrewm@0	29 @ r7: freqDerivatives Derivative of frequency for each oscillator (pointer)
andrewm@0	30 @ r8: ampDerivatives Derivative of amplitude for each oscillator (pointer)
andrewm@0	31 @ r9: lookupTable Lookup table containing one oscillation
andrewm@0	32 @
andrewm@0	33 @ Alignment requirements:
andrewm@0	34 @ audioOut: 8-byte boundary
andrewm@0	35 @ phases: 16-byte boundary
andrewm@0	36 @ frequencies: 16-byte boundary
andrewm@0	37 @ amplitudes: 16-byte boundary
andrewm@0	38 @ freqDerivatives: 16-byte bounary
andrewm@0	39 @ ampDerivatives: 16-byte boundary
andrewm@0	40 @ lookupTable: 4-byte boundary (TODO: check this)
andrewm@0	41
andrewm@0	42 .align 2
andrewm@0	43 .global oscillator_bank_neon
andrewm@0	44 .thumb
andrewm@0	45 .thumb_func
andrewm@0	46 .type oscillator_bank_neon, %function
andrewm@0	47 oscillator_bank_neon:
andrewm@0	48
andrewm@0	49
andrewm@0	50 dSample .dn D6.F32
andrewm@0	51 qPhases .qn Q8.F32
andrewm@0	52 dPhases_0 .dn D16.F32
andrewm@0	53 dPhases_1 .dn D17.F32
andrewm@0	54 qFreqs .qn Q9.F32
andrewm@0	55 dFreqs_0 .dn D18.F32
andrewm@0	56 dFreqs_1 .dn D19.F32
andrewm@0	57 qAmps .qn Q10.F32
andrewm@0	58 dAmps_0 .dn D20.F32
andrewm@0	59 dAmps_1 .dn D21.F32
andrewm@0	60 qFreqDs .qn Q11.F32
andrewm@0	61 dFreqDs_0 .dn D22.F32
andrewm@0	62 dFreqDs_1 .dn D23.F32
andrewm@0	63 qAmpDs .qn Q12.F32
andrewm@0	64 dAmpDs_0 .dn D24.F32
andrewm@0	65 dAmpDs_1 .dn D25.F32
andrewm@0	66
andrewm@0	67 qBaseInts .qn Q13.U32 @ Base indexes: unsigned ints x4
andrewm@0	68 dBaseInts_0 .dn D26.U32
andrewm@0	69 dBaseInts_1 .dn D27.U32
andrewm@0	70 qFractions .qn Q14.F32 @ Fraction indexes: floats x4
andrewm@0	71 qTableBase .qn Q15.U32 @ Base of lookup table
andrewm@0	72
andrewm@0	73 cmp r0, #0 @ Check for trivial case 1: zero frames
andrewm@0	74 it eq
andrewm@0	75 bxeq lr @ Return if that's the case (otherwise might have odd behaviour)
andrewm@0	76 cmp r2, #4 @ Check for trivial case 2: zero oscillators
andrewm@0	77 it lt
andrewm@0	78 bxlt lr @ Return if that's the case
andrewm@0	79
andrewm@0	80 push {r4-r11} @ Now arguments start 32 bytes above SP
andrewm@0	81 add r11, sp, #32 @ Pointer to 32 bytes into the stack
andrewm@0	82 ldm r11, {r4-r9} @ Load 6 arguments into registers
andrewm@0	83
andrewm@0	84 vdup qTableBase, r9 @ Move lookup table base index into 4 ints
andrewm@0	85
andrewm@0	86 @ Outer loop: iterate over the number of oscillators, choosing 4 at a
andrewm@0	87 @ time to work with.
andrewm@0	88 oscbank_oscillator_loop:
andrewm@0	89 vld1 {dPhases_0, dPhases_1}, [r4] @ no increment; will store at end of sample loop
andrewm@0	90 vld1 {dFreqs_0, dFreqs_1}, [r5]
andrewm@0	91 vld1 {dAmps_0, dAmps_1}, [r6]
andrewm@0	92 vld1 {dFreqDs_0, dFreqDs_1}, [r7]! @ increment; won't update at end of sample loop
andrewm@0	93 vld1 {dAmpDs_0, dAmpDs_1}, [r8]!
andrewm@0	94
andrewm@0	95 push {r0-r1,r4-r8}
andrewm@0	96 @ --- inner loop: iterate over the number of samples ---
andrewm@0	97 oscbank_sample_loop:
andrewm@0	98 vcvt qBaseInts, qPhases @ Take floor(phases)
andrewm@0	99 vmov q2.f32, #1.0 @ Load 1.0 into every slot of q2
andrewm@0	100 vshl q0.U32, qBaseInts, #2 @ Shift the indexes left 2 (*4 for float addressing)
andrewm@0	101 vcvt qFractions, qBaseInts @ int back to float
andrewm@0	102 vadd q0.U32, q0.U32, qTableBase @ Find memory addresses
andrewm@0	103
andrewm@0	104 vmov r4, r5, d0 @ Move two indexes to ARM registers
andrewm@0	105 vmov r6, r7, d1 @ Move two more indexes to ARM registers
andrewm@0	106 vsub qFractions, qPhases, qFractions @ fraction = phase - floor(phase)
andrewm@0	107
andrewm@0	108 vldr.64 d0, [r4] @ Load two consecutive floats at each location
andrewm@0	109 vldr.64 d1, [r5] @ These hold the previous and following samples in the table
andrewm@0	110 vldr.64 d2, [r6] @ TODO: check whether these work at 4-byte alignment
andrewm@0	111 vldr.64 d3, [r7]
andrewm@0	112
andrewm@0	113 @ Format at this point:
andrewm@0	114 @ Osc0(before) Osc0(after) Osc1(before) Osc1(after) Osc2(before) Osc2(after) Osc3(before) Osc3(after)
andrewm@0	115 @ We want:
andrewm@0	116 @ Osc0(before) Osc1(before) Osc2(before) Osc3(before) Osc0(after) Osc1(after) Osc2(after) Osc3(after)
andrewm@0	117
andrewm@0	118 vuzp.32 q0, q1 @ Now q0 contains before, q1 contains after
andrewm@0	119 vsub q2.f32, q2.f32, qFractions @ q2 = 1.0 - fraction
andrewm@0	120 vmul q1.f32, q1.f32, qFractions @ q1 = fraction * after
andrewm@0	121 vmul q0.f32, q0.f32, q2.f32 @ q0 = (1.0 - fraction) * before
andrewm@0	122
andrewm@0	123 vadd qPhases, qPhases, qFreqs @ Update phases
andrewm@0	124 vadd qFreqs, qFreqs, qFreqDs @ Update frequencies
andrewm@0	125
andrewm@0	126 vadd q0.f32, q0.f32, q1.f32 @ Add two interpolated components to get the final sample
andrewm@0	127 vdup q2.u32, r3 @ Put lookup table size into each element of q2
andrewm@0	128 vcvt qBaseInts, qPhases @ Take floor of new phases
andrewm@0	129 vmul q0.f32, q0.f32, qAmps @ Multiply samples by current amplitude
andrewm@0	130
andrewm@0	131 vld1 dSample, [r1] @ Load the current stereo samples
andrewm@0	132 vpadd d2.f32, d0.f32, d1.f32 @ Pairwise accumulate q0 (output sample) into d2
andrewm@0	133
andrewm@0	134 vand q2, q2, qBaseInts @ Logical AND of new phase int leaves 1 bit set only if phase >= table size
andrewm@0	135 vpadd d3.f32, d2.f32, d2.f32 @ Pairwise accumulate d2 into d0 --> d0[0] and d0[1] both hold total of 4 oscillators
andrewm@0	136 vadd qAmps, qAmps, qAmpDs @ Update amplitudes
andrewm@0	137 vcvt q0.f32, q2.u32 @ Convert int back to float after AND operation
andrewm@0	138
andrewm@0	139 vadd dSample, dSample, d3.f32 @ Add oscillator outputs to each channel
andrewm@0	140
andrewm@0	141 subs r0, r0, #1 @ numFrames--
andrewm@0	142 vsub qPhases, qPhases, q0.f32 @ Keep phases in table range
andrewm@0	143 vst1 dSample, [r1]! @ Store back in buffer and increment by 8
andrewm@0	144
andrewm@0	145 it gt
andrewm@0	146 bgt oscbank_sample_loop @ Loop if numFrames > 0
andrewm@0	147
andrewm@0	148 @ --- end inner loop ---
andrewm@0	149 pop {r0-r1,r4-r8} @ Restore registers: restores audioOut and numFrames, among others
andrewm@0	150
andrewm@0	151 vst1 {dPhases_0, dPhases_1}, [r4]! @ Store phases back to array
andrewm@0	152 vst1 {dFreqs_0, dFreqs_1}, [r5]! @ Store frequencies back to array
andrewm@0	153 vst1 {dAmps_0, dAmps_1}, [r6]! @ Store amplitudes back to array
andrewm@0	154 @ No need to update r7, r8
andrewm@0	155
andrewm@0	156 subs r2, r2, #4 @ numPartials -= 4
andrewm@0	157 it gt
andrewm@0	158 bgt oscbank_oscillator_loop @ Loop if numPartials > 0
andrewm@0	159
andrewm@0	160 pop {r4-r11}
andrewm@0	161 bx lr
andrewm@0	162
andrewm@0	163
andrewm@0	164 @ void wavetable_interpolate_neon(int numSamplesIn, int numSamplesOut,
andrewm@0	165 @ float tableIn, float tableOut);
andrewm@0	166
andrewm@0	167 @ Registers:
andrewm@0	168 @ r0: numSamplesIn Size of the input table
andrewm@0	169 @ r1: numSamplesOut Size of the output table
andrewm@0	170 @ r2: tableIn Pointer to input table
andrewm@0	171 @ r3: tableOut Pointer to output table
andrewm@0	172
andrewm@0	173 @ Alignment requirements:
andrewm@0	174 @ tableIn: 8-byte boundary
andrewm@0	175 @ tableOut: 8-byte boundary
andrewm@0	176
andrewm@0	177 .align 2
andrewm@0	178 .global wavetable_interpolate_neon
andrewm@0	179 .thumb
andrewm@0	180 .thumb_func
andrewm@0	181 .type wavetable_interpolate_neon, %function
andrewm@0	182 wavetable_interpolate_neon:
andrewm@0	183 @ TODO
andrewm@0	184
andrewm@0	185 bx lr

Mercurial > hg > beaglert

annotate projects/d-box/audio_routines.S @ 0:8a575ba3ab52