comparison core/FormatConvert.S @ 318:f7b19ea31bbb prerelease

Added NEON vectorised float<->int converters. Curiously, its performance is worse than the C version. I guess clang is better at vectorising than we thought! The new code remains disabled for now.
author andrewm
date Mon, 30 May 2016 01:06:01 +0100
parents
children
comparison
equal deleted inserted replaced
317:52733b58bdf3 318:f7b19ea31bbb
1 @
2 @ FormatConvert.S
3 @
4 @ NEON-based vector functions for converting data between int
5 @ and float with clipping and optional level scaling.
6 @ Written in assembly for performance reasons.
7 @
8 @ Bela: http://bela.io
9 @
10 @ (c) 2016 Andrew McPherson
11 @ Centre for Digital Music
12 @ Queen Mary University of London
13 @
14
15 .syntax unified
16 .arch armv7-a
17 .fpu neon
18
19 @ void int16_to_float_audio(int numSamples, int16_t *inBuffer, float *outBuffer);
20 @
21 @ Convert 16-bit signed ints to floats between -1.0 and 1.0. Used for audio input.
22 @
23 @ numSamples should be a multiple of 4 (i.e. with stereo audio, frames should be even)
24 @ inBuffer should be aligned on an 8-byte boundary.
25 @ outBuffer should be aligned on a 16-byte boundary.
26
27 @ Registers:
28 @ r0: numSamples How many frames to convert
29 @ r1: inBuffer Buffer for input samples from ADC
30 @ r2: outBuffer Buffer to store output samples for render function
31
32 dIFAu_Input .dn D0.S16
33 qIFAu_Expanded .qn Q1.S32
34 qIFAu_Output .qn Q2.F32
35 dIFAu_Output_0 .dn D4.F32
36 dIFAu_Output_1 .dn D5.F32
37
38 .align 2
39 .global int16_to_float_audio
40 .thumb
41 .thumb_func
42 .type int16_to_float_audio, %function
43 int16_to_float_audio:
44 lsr r0, r0, #2
45 lsl r0, r0, #2 @ Clip off the last two bits of numSamples (only multiples of 4 allowed)
46 cmp r0, #0 @ Check for trivial case: zero samples
47 it eq
48 bxeq lr @ Return if that's the case
49
50 @ ---- loop: iterate over the number of samples ----
51 int16_to_float_audio_loop:
52 vld1 {dIFAu_Input}, [r1]! @ Load four 16-bit signed ints from inBuffer++
53 vmovl qIFAu_Expanded, dIFAu_Input @ Convert four 16-bit signed ints to 32-bit signed ints
54 vcvt qIFAu_Output, qIFAu_Expanded, #15 @ Convert four Q17.15 fixed points to floats (equiv. to / 32768)
55 vst1 {dIFAu_Output_0, dIFAu_Output_1}, [r2]! @ Store floats back into memory
56
57 subs r0, r0, #4 @ numSamples -= 4
58 it gt
59 bgt int16_to_float_audio_loop @ Loop if numSamples > 0
60
61 bx lr
62
63
64
65 @ void int16_to_float_analog(int numSamples, uint16_t *inBuffer, float *outBuffer);
66 @
67 @ Convert 16-bit unsigned ints to floats between 0.0 and 1.0. Used for analog input.
68 @
69 @ numSamples should be a multiple of 4 (i.e. integer numFrames at 4 channels)
70 @ inBuffer should be aligned on an 8-byte boundary.
71 @ outBuffer should be aligned on a 16-byte boundary.
72
73 @ Registers:
74 @ r0: numSamples How many frames to convert
75 @ r1: inBuffer Buffer for input samples from ADC
76 @ r2: outBuffer Buffer to store output samples for render function
77
78 dIFAn_Input .dn D0.U16
79 qIFAn_Expanded .qn Q1.U32
80 qIFAn_Output .qn Q2.F32
81 dIFAn_Output_0 .dn D4.F32
82 dIFAn_Output_1 .dn D5.F32
83
84 .align 2
85 .global int16_to_float_analog
86 .thumb
87 .thumb_func
88 .type int16_to_float_analog, %function
89 int16_to_float_analog:
90 lsr r0, r0, #2
91 lsl r0, r0, #2 @ Clip off the last two bits of numSamples (only multiples of 4 allowed)
92 cmp r0, #0 @ Check for trivial case: zero samples
93 it eq
94 bxeq lr @ Return if that's the case
95
96 @ ---- loop: iterate over the number of samples ----
97 int16_to_float_analog_loop:
98 vld1 {dIFAn_Input}, [r1]! @ Load four 16-bit signed ints from inBuffer++
99 vmovl qIFAn_Expanded, dIFAn_Input @ Convert four 16-bit signed ints to 32-bit signed ints
100 vcvt qIFAn_Output, qIFAn_Expanded, #16 @ Convert four Q16.16 fixed points to floats (equiv. to / 65536)
101 vst1 {dIFAn_Output_0, dIFAn_Output_1}, [r2]! @ Store floats back into memory
102
103 subs r0, r0, #4 @ numSamples -= 4
104 it gt
105 bgt int16_to_float_analog_loop @ Loop if numSamples > 0
106
107 bx lr
108
109
110 @ void float_to_int16_audio(int numSamples, float *inBuffer, int16_t *outBuffer);
111 @
112 @ Convert floats between -1.0 and 1.0 to 16-bit signed ints, with saturation.
113 @ Used for audio output.
114 @
115 @ numSamples should be a multiple of 4 (i.e. with stereo audio, frames should be even)
116 @ inBuffer should be aligned on a 16-byte boundary.
117 @ outBuffer should be aligned on an 8-byte boundary.
118
119 @ Registers:
120 @ r0: numSamples How many frames to convert
121 @ r1: inBuffer Buffer for input samples from render function
122 @ r2: outBuffer Buffer to store output samples for DAC
123
124 qFIAu_Input .qn Q0.F32
125 dFIAu_Input_0 .dn D0.F32
126 dFIAu_Input_1, .dn D1.F32
127 qFIAu_Converted .qn Q1.S32
128 dFIAu_Narrowed .dn D4.S16
129
130 .align 2
131 .global float_to_int16_audio
132 .thumb
133 .thumb_func
134 .type float_to_int16_audio, %function
135
136 float_to_int16_audio:
137 lsr r0, r0, #2
138 lsl r0, r0, #2 @ Clip off the last two bits of numSamples (only multiples of 4 allowed)
139 cmp r0, #0 @ Check for trivial case: zero samples
140 it eq
141 bxeq lr @ Return if that's the case
142
143 @ ---- loop: iterate over the number of samples ----
144 float_to_int16_audio_loop:
145 vld1 {dFIAu_Input_0, dFIAu_Input_1}, [r1]! @ Load four floats from inBuffer++
146 vcvt qFIAu_Converted, qFIAu_Input, #15 @ Convert four floats into four Q17.15 fixed points (equiv. to * 32768)
147 @ This will truncate the result to a 32-bit representable value
148 vqmovn dFIAu_Narrowed, qFIAu_Converted @ Convert four 32-bit signed ints to 16-bit signed ints, with saturation
149 vst1 {dFIAu_Narrowed}, [r2]! @ Store ints back into memory
150
151 subs r0, r0, #4 @ numSamples -= 4
152 it gt
153 bgt float_to_int16_audio_loop @ Loop if numSamples > 0
154
155 bx lr
156
157
158 @ void float_to_int16_analog(int numSamples, float *inBuffer, uint16_t *outBuffer);
159 @
160 @ Convert floats between 0.0 and 1.0 to 16-bit unsigned ints, with saturation.
161 @ Used for analog output.
162 @
163 @ numSamples should be a multiple of 4 (i.e. with stereo audio, frames should be even)
164 @ inBuffer should be aligned on a 16-byte boundary.
165 @ outBuffer should be aligned on an 8-byte boundary.
166
167 @ Registers:
168 @ r0: numSamples How many frames to convert
169 @ r1: inBuffer Buffer for input samples from render function
170 @ r2: outBuffer Buffer to store output samples for DAC
171
172 qFIAn_Input .qn Q0.F32
173 dFIAn_Input_0 .dn D0.F32
174 dFIAn_Input_1, .dn D1.F32
175 qFIAn_Converted .qn Q1.U32
176 dFIAn_Narrowed .dn D4.U16
177
178 .align 2
179 .global float_to_int16_analog
180 .thumb
181 .thumb_func
182 .type float_to_int16_analog, %function
183
184 float_to_int16_analog:
185 lsr r0, r0, #2
186 lsl r0, r0, #2 @ Clip off the last two bits of numSamples (only multiples of 4 allowed)
187 cmp r0, #0 @ Check for trivial case: zero samples
188 it eq
189 bxeq lr @ Return if that's the case
190
191 @ ---- loop: iterate over the number of samples ----
192 float_to_int16_analog_loop:
193 vld1 {dFIAn_Input_0, dFIAn_Input_1}, [r1]! @ Load four floats from inBuffer++
194 vcvt qFIAn_Converted, qFIAn_Input, #16 @ Convert four floats into four Q16.16 fixed points (equiv. to * 65536)
195 @ This will truncate the result to a 32-bit representable value
196 vqmovn dFIAn_Narrowed, qFIAn_Converted @ Convert four 32-bit unsigned ints to 16-bit unsigned ints, with saturation
197 vst1 {dFIAn_Narrowed}, [r2]! @ Store ints back into memory
198
199 subs r0, r0, #4 @ numSamples -= 4
200 it gt
201 bgt float_to_int16_analog_loop @ Loop if numSamples > 0
202
203 bx lr
204