Mercurial > hg > beaglert
comparison core/FormatConvert.S @ 318:f7b19ea31bbb prerelease
Added NEON vectorised float<->int converters. Curiously, its performance is worse than the C version. I guess clang is better at vectorising than we thought! The new code remains disabled for now.
author | andrewm |
---|---|
date | Mon, 30 May 2016 01:06:01 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
317:52733b58bdf3 | 318:f7b19ea31bbb |
---|---|
1 @ | |
2 @ FormatConvert.S | |
3 @ | |
4 @ NEON-based vector functions for converting data between int | |
5 @ and float with clipping and optional level scaling. | |
6 @ Written in assembly for performance reasons. | |
7 @ | |
8 @ Bela: http://bela.io | |
9 @ | |
10 @ (c) 2016 Andrew McPherson | |
11 @ Centre for Digital Music | |
12 @ Queen Mary University of London | |
13 @ | |
14 | |
15 .syntax unified | |
16 .arch armv7-a | |
17 .fpu neon | |
18 | |
19 @ void int16_to_float_audio(int numSamples, int16_t *inBuffer, float *outBuffer); | |
20 @ | |
21 @ Convert 16-bit signed ints to floats between -1.0 and 1.0. Used for audio input. | |
22 @ | |
23 @ numSamples should be a multiple of 4 (i.e. with stereo audio, frames should be even) | |
24 @ inBuffer should be aligned on an 8-byte boundary. | |
25 @ outBuffer should be aligned on a 16-byte boundary. | |
26 | |
27 @ Registers: | |
28 @ r0: numSamples How many frames to convert | |
29 @ r1: inBuffer Buffer for input samples from ADC | |
30 @ r2: outBuffer Buffer to store output samples for render function | |
31 | |
32 dIFAu_Input .dn D0.S16 | |
33 qIFAu_Expanded .qn Q1.S32 | |
34 qIFAu_Output .qn Q2.F32 | |
35 dIFAu_Output_0 .dn D4.F32 | |
36 dIFAu_Output_1 .dn D5.F32 | |
37 | |
38 .align 2 | |
39 .global int16_to_float_audio | |
40 .thumb | |
41 .thumb_func | |
42 .type int16_to_float_audio, %function | |
43 int16_to_float_audio: | |
44 lsr r0, r0, #2 | |
45 lsl r0, r0, #2 @ Clip off the last two bits of numSamples (only multiples of 4 allowed) | |
46 cmp r0, #0 @ Check for trivial case: zero samples | |
47 it eq | |
48 bxeq lr @ Return if that's the case | |
49 | |
50 @ ---- loop: iterate over the number of samples ---- | |
51 int16_to_float_audio_loop: | |
52 vld1 {dIFAu_Input}, [r1]! @ Load four 16-bit signed ints from inBuffer++ | |
53 vmovl qIFAu_Expanded, dIFAu_Input @ Convert four 16-bit signed ints to 32-bit signed ints | |
54 vcvt qIFAu_Output, qIFAu_Expanded, #15 @ Convert four Q17.15 fixed points to floats (equiv. to / 32768) | |
55 vst1 {dIFAu_Output_0, dIFAu_Output_1}, [r2]! @ Store floats back into memory | |
56 | |
57 subs r0, r0, #4 @ numSamples -= 4 | |
58 it gt | |
59 bgt int16_to_float_audio_loop @ Loop if numSamples > 0 | |
60 | |
61 bx lr | |
62 | |
63 | |
64 | |
65 @ void int16_to_float_analog(int numSamples, uint16_t *inBuffer, float *outBuffer); | |
66 @ | |
67 @ Convert 16-bit unsigned ints to floats between 0.0 and 1.0. Used for analog input. | |
68 @ | |
69 @ numSamples should be a multiple of 4 (i.e. integer numFrames at 4 channels) | |
70 @ inBuffer should be aligned on an 8-byte boundary. | |
71 @ outBuffer should be aligned on a 16-byte boundary. | |
72 | |
73 @ Registers: | |
74 @ r0: numSamples How many frames to convert | |
75 @ r1: inBuffer Buffer for input samples from ADC | |
76 @ r2: outBuffer Buffer to store output samples for render function | |
77 | |
78 dIFAn_Input .dn D0.U16 | |
79 qIFAn_Expanded .qn Q1.U32 | |
80 qIFAn_Output .qn Q2.F32 | |
81 dIFAn_Output_0 .dn D4.F32 | |
82 dIFAn_Output_1 .dn D5.F32 | |
83 | |
84 .align 2 | |
85 .global int16_to_float_analog | |
86 .thumb | |
87 .thumb_func | |
88 .type int16_to_float_analog, %function | |
89 int16_to_float_analog: | |
90 lsr r0, r0, #2 | |
91 lsl r0, r0, #2 @ Clip off the last two bits of numSamples (only multiples of 4 allowed) | |
92 cmp r0, #0 @ Check for trivial case: zero samples | |
93 it eq | |
94 bxeq lr @ Return if that's the case | |
95 | |
96 @ ---- loop: iterate over the number of samples ---- | |
97 int16_to_float_analog_loop: | |
98 vld1 {dIFAn_Input}, [r1]! @ Load four 16-bit signed ints from inBuffer++ | |
99 vmovl qIFAn_Expanded, dIFAn_Input @ Convert four 16-bit signed ints to 32-bit signed ints | |
100 vcvt qIFAn_Output, qIFAn_Expanded, #16 @ Convert four Q16.16 fixed points to floats (equiv. to / 65536) | |
101 vst1 {dIFAn_Output_0, dIFAn_Output_1}, [r2]! @ Store floats back into memory | |
102 | |
103 subs r0, r0, #4 @ numSamples -= 4 | |
104 it gt | |
105 bgt int16_to_float_analog_loop @ Loop if numSamples > 0 | |
106 | |
107 bx lr | |
108 | |
109 | |
110 @ void float_to_int16_audio(int numSamples, float *inBuffer, int16_t *outBuffer); | |
111 @ | |
112 @ Convert floats between -1.0 and 1.0 to 16-bit signed ints, with saturation. | |
113 @ Used for audio output. | |
114 @ | |
115 @ numSamples should be a multiple of 4 (i.e. with stereo audio, frames should be even) | |
116 @ inBuffer should be aligned on a 16-byte boundary. | |
117 @ outBuffer should be aligned on an 8-byte boundary. | |
118 | |
119 @ Registers: | |
120 @ r0: numSamples How many frames to convert | |
121 @ r1: inBuffer Buffer for input samples from render function | |
122 @ r2: outBuffer Buffer to store output samples for DAC | |
123 | |
124 qFIAu_Input .qn Q0.F32 | |
125 dFIAu_Input_0 .dn D0.F32 | |
126 dFIAu_Input_1, .dn D1.F32 | |
127 qFIAu_Converted .qn Q1.S32 | |
128 dFIAu_Narrowed .dn D4.S16 | |
129 | |
130 .align 2 | |
131 .global float_to_int16_audio | |
132 .thumb | |
133 .thumb_func | |
134 .type float_to_int16_audio, %function | |
135 | |
136 float_to_int16_audio: | |
137 lsr r0, r0, #2 | |
138 lsl r0, r0, #2 @ Clip off the last two bits of numSamples (only multiples of 4 allowed) | |
139 cmp r0, #0 @ Check for trivial case: zero samples | |
140 it eq | |
141 bxeq lr @ Return if that's the case | |
142 | |
143 @ ---- loop: iterate over the number of samples ---- | |
144 float_to_int16_audio_loop: | |
145 vld1 {dFIAu_Input_0, dFIAu_Input_1}, [r1]! @ Load four floats from inBuffer++ | |
146 vcvt qFIAu_Converted, qFIAu_Input, #15 @ Convert four floats into four Q17.15 fixed points (equiv. to * 32768) | |
147 @ This will truncate the result to a 32-bit representable value | |
148 vqmovn dFIAu_Narrowed, qFIAu_Converted @ Convert four 32-bit signed ints to 16-bit signed ints, with saturation | |
149 vst1 {dFIAu_Narrowed}, [r2]! @ Store ints back into memory | |
150 | |
151 subs r0, r0, #4 @ numSamples -= 4 | |
152 it gt | |
153 bgt float_to_int16_audio_loop @ Loop if numSamples > 0 | |
154 | |
155 bx lr | |
156 | |
157 | |
158 @ void float_to_int16_analog(int numSamples, float *inBuffer, uint16_t *outBuffer); | |
159 @ | |
160 @ Convert floats between 0.0 and 1.0 to 16-bit unsigned ints, with saturation. | |
161 @ Used for analog output. | |
162 @ | |
163 @ numSamples should be a multiple of 4 (i.e. with stereo audio, frames should be even) | |
164 @ inBuffer should be aligned on a 16-byte boundary. | |
165 @ outBuffer should be aligned on an 8-byte boundary. | |
166 | |
167 @ Registers: | |
168 @ r0: numSamples How many frames to convert | |
169 @ r1: inBuffer Buffer for input samples from render function | |
170 @ r2: outBuffer Buffer to store output samples for DAC | |
171 | |
172 qFIAn_Input .qn Q0.F32 | |
173 dFIAn_Input_0 .dn D0.F32 | |
174 dFIAn_Input_1, .dn D1.F32 | |
175 qFIAn_Converted .qn Q1.U32 | |
176 dFIAn_Narrowed .dn D4.U16 | |
177 | |
178 .align 2 | |
179 .global float_to_int16_analog | |
180 .thumb | |
181 .thumb_func | |
182 .type float_to_int16_analog, %function | |
183 | |
184 float_to_int16_analog: | |
185 lsr r0, r0, #2 | |
186 lsl r0, r0, #2 @ Clip off the last two bits of numSamples (only multiples of 4 allowed) | |
187 cmp r0, #0 @ Check for trivial case: zero samples | |
188 it eq | |
189 bxeq lr @ Return if that's the case | |
190 | |
191 @ ---- loop: iterate over the number of samples ---- | |
192 float_to_int16_analog_loop: | |
193 vld1 {dFIAn_Input_0, dFIAn_Input_1}, [r1]! @ Load four floats from inBuffer++ | |
194 vcvt qFIAn_Converted, qFIAn_Input, #16 @ Convert four floats into four Q16.16 fixed points (equiv. to * 65536) | |
195 @ This will truncate the result to a 32-bit representable value | |
196 vqmovn dFIAn_Narrowed, qFIAn_Converted @ Convert four 32-bit unsigned ints to 16-bit unsigned ints, with saturation | |
197 vst1 {dFIAn_Narrowed}, [r2]! @ Store ints back into memory | |
198 | |
199 subs r0, r0, #4 @ numSamples -= 4 | |
200 it gt | |
201 bgt float_to_int16_analog_loop @ Loop if numSamples > 0 | |
202 | |
203 bx lr | |
204 |