yading@10
|
1 /*
|
yading@10
|
2 * ARM NEON optimised Format Conversion Utils
|
yading@10
|
3 * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
|
yading@10
|
4 *
|
yading@10
|
5 * This file is part of FFmpeg.
|
yading@10
|
6 *
|
yading@10
|
7 * FFmpeg is free software; you can redistribute it and/or
|
yading@10
|
8 * modify it under the terms of the GNU Lesser General Public
|
yading@10
|
9 * License as published by the Free Software Foundation; either
|
yading@10
|
10 * version 2.1 of the License, or (at your option) any later version.
|
yading@10
|
11 *
|
yading@10
|
12 * FFmpeg is distributed in the hope that it will be useful,
|
yading@10
|
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
yading@10
|
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
yading@10
|
15 * Lesser General Public License for more details.
|
yading@10
|
16 *
|
yading@10
|
17 * You should have received a copy of the GNU Lesser General Public
|
yading@10
|
18 * License along with FFmpeg; if not, write to the Free Software
|
yading@10
|
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
yading@10
|
20 */
|
yading@10
|
21
|
yading@10
|
22 #include "config.h"
|
yading@10
|
23 #include "libavutil/arm/asm.S"
|
yading@10
|
24
|
yading@10
|
25 function ff_float_to_int16_neon, export=1
|
yading@10
|
26 subs r2, r2, #8
|
yading@10
|
27 vld1.64 {d0-d1}, [r1,:128]!
|
yading@10
|
28 vcvt.s32.f32 q8, q0, #16
|
yading@10
|
29 vld1.64 {d2-d3}, [r1,:128]!
|
yading@10
|
30 vcvt.s32.f32 q9, q1, #16
|
yading@10
|
31 beq 3f
|
yading@10
|
32 bics ip, r2, #15
|
yading@10
|
33 beq 2f
|
yading@10
|
34 1: subs ip, ip, #16
|
yading@10
|
35 vshrn.s32 d4, q8, #16
|
yading@10
|
36 vld1.64 {d0-d1}, [r1,:128]!
|
yading@10
|
37 vcvt.s32.f32 q0, q0, #16
|
yading@10
|
38 vshrn.s32 d5, q9, #16
|
yading@10
|
39 vld1.64 {d2-d3}, [r1,:128]!
|
yading@10
|
40 vcvt.s32.f32 q1, q1, #16
|
yading@10
|
41 vshrn.s32 d6, q0, #16
|
yading@10
|
42 vst1.64 {d4-d5}, [r0,:128]!
|
yading@10
|
43 vshrn.s32 d7, q1, #16
|
yading@10
|
44 vld1.64 {d16-d17},[r1,:128]!
|
yading@10
|
45 vcvt.s32.f32 q8, q8, #16
|
yading@10
|
46 vld1.64 {d18-d19},[r1,:128]!
|
yading@10
|
47 vcvt.s32.f32 q9, q9, #16
|
yading@10
|
48 vst1.64 {d6-d7}, [r0,:128]!
|
yading@10
|
49 bne 1b
|
yading@10
|
50 ands r2, r2, #15
|
yading@10
|
51 beq 3f
|
yading@10
|
52 2: vld1.64 {d0-d1}, [r1,:128]!
|
yading@10
|
53 vshrn.s32 d4, q8, #16
|
yading@10
|
54 vcvt.s32.f32 q0, q0, #16
|
yading@10
|
55 vld1.64 {d2-d3}, [r1,:128]!
|
yading@10
|
56 vshrn.s32 d5, q9, #16
|
yading@10
|
57 vcvt.s32.f32 q1, q1, #16
|
yading@10
|
58 vshrn.s32 d6, q0, #16
|
yading@10
|
59 vst1.64 {d4-d5}, [r0,:128]!
|
yading@10
|
60 vshrn.s32 d7, q1, #16
|
yading@10
|
61 vst1.64 {d6-d7}, [r0,:128]!
|
yading@10
|
62 bx lr
|
yading@10
|
63 3: vshrn.s32 d4, q8, #16
|
yading@10
|
64 vshrn.s32 d5, q9, #16
|
yading@10
|
65 vst1.64 {d4-d5}, [r0,:128]!
|
yading@10
|
66 bx lr
|
yading@10
|
67 endfunc
|
yading@10
|
68
|
yading@10
|
69 function ff_float_to_int16_interleave_neon, export=1
|
yading@10
|
70 cmp r3, #2
|
yading@10
|
71 itt lt
|
yading@10
|
72 ldrlt r1, [r1]
|
yading@10
|
73 blt ff_float_to_int16_neon
|
yading@10
|
74 bne 4f
|
yading@10
|
75
|
yading@10
|
76 ldr r3, [r1]
|
yading@10
|
77 ldr r1, [r1, #4]
|
yading@10
|
78
|
yading@10
|
79 subs r2, r2, #8
|
yading@10
|
80 vld1.64 {d0-d1}, [r3,:128]!
|
yading@10
|
81 vcvt.s32.f32 q8, q0, #16
|
yading@10
|
82 vld1.64 {d2-d3}, [r3,:128]!
|
yading@10
|
83 vcvt.s32.f32 q9, q1, #16
|
yading@10
|
84 vld1.64 {d20-d21},[r1,:128]!
|
yading@10
|
85 vcvt.s32.f32 q10, q10, #16
|
yading@10
|
86 vld1.64 {d22-d23},[r1,:128]!
|
yading@10
|
87 vcvt.s32.f32 q11, q11, #16
|
yading@10
|
88 beq 3f
|
yading@10
|
89 bics ip, r2, #15
|
yading@10
|
90 beq 2f
|
yading@10
|
91 1: subs ip, ip, #16
|
yading@10
|
92 vld1.64 {d0-d1}, [r3,:128]!
|
yading@10
|
93 vcvt.s32.f32 q0, q0, #16
|
yading@10
|
94 vsri.32 q10, q8, #16
|
yading@10
|
95 vld1.64 {d2-d3}, [r3,:128]!
|
yading@10
|
96 vcvt.s32.f32 q1, q1, #16
|
yading@10
|
97 vld1.64 {d24-d25},[r1,:128]!
|
yading@10
|
98 vcvt.s32.f32 q12, q12, #16
|
yading@10
|
99 vld1.64 {d26-d27},[r1,:128]!
|
yading@10
|
100 vsri.32 q11, q9, #16
|
yading@10
|
101 vst1.64 {d20-d21},[r0,:128]!
|
yading@10
|
102 vcvt.s32.f32 q13, q13, #16
|
yading@10
|
103 vst1.64 {d22-d23},[r0,:128]!
|
yading@10
|
104 vsri.32 q12, q0, #16
|
yading@10
|
105 vld1.64 {d16-d17},[r3,:128]!
|
yading@10
|
106 vsri.32 q13, q1, #16
|
yading@10
|
107 vst1.64 {d24-d25},[r0,:128]!
|
yading@10
|
108 vcvt.s32.f32 q8, q8, #16
|
yading@10
|
109 vld1.64 {d18-d19},[r3,:128]!
|
yading@10
|
110 vcvt.s32.f32 q9, q9, #16
|
yading@10
|
111 vld1.64 {d20-d21},[r1,:128]!
|
yading@10
|
112 vcvt.s32.f32 q10, q10, #16
|
yading@10
|
113 vld1.64 {d22-d23},[r1,:128]!
|
yading@10
|
114 vcvt.s32.f32 q11, q11, #16
|
yading@10
|
115 vst1.64 {d26-d27},[r0,:128]!
|
yading@10
|
116 bne 1b
|
yading@10
|
117 ands r2, r2, #15
|
yading@10
|
118 beq 3f
|
yading@10
|
119 2: vsri.32 q10, q8, #16
|
yading@10
|
120 vld1.64 {d0-d1}, [r3,:128]!
|
yading@10
|
121 vcvt.s32.f32 q0, q0, #16
|
yading@10
|
122 vld1.64 {d2-d3}, [r3,:128]!
|
yading@10
|
123 vcvt.s32.f32 q1, q1, #16
|
yading@10
|
124 vld1.64 {d24-d25},[r1,:128]!
|
yading@10
|
125 vcvt.s32.f32 q12, q12, #16
|
yading@10
|
126 vsri.32 q11, q9, #16
|
yading@10
|
127 vld1.64 {d26-d27},[r1,:128]!
|
yading@10
|
128 vcvt.s32.f32 q13, q13, #16
|
yading@10
|
129 vst1.64 {d20-d21},[r0,:128]!
|
yading@10
|
130 vsri.32 q12, q0, #16
|
yading@10
|
131 vst1.64 {d22-d23},[r0,:128]!
|
yading@10
|
132 vsri.32 q13, q1, #16
|
yading@10
|
133 vst1.64 {d24-d27},[r0,:128]!
|
yading@10
|
134 bx lr
|
yading@10
|
135 3: vsri.32 q10, q8, #16
|
yading@10
|
136 vsri.32 q11, q9, #16
|
yading@10
|
137 vst1.64 {d20-d23},[r0,:128]!
|
yading@10
|
138 bx lr
|
yading@10
|
139
|
yading@10
|
140 4: push {r4-r8,lr}
|
yading@10
|
141 cmp r3, #4
|
yading@10
|
142 lsl ip, r3, #1
|
yading@10
|
143 blt 4f
|
yading@10
|
144
|
yading@10
|
145 @ 4 channels
|
yading@10
|
146 5: ldmia r1!, {r4-r7}
|
yading@10
|
147 mov lr, r2
|
yading@10
|
148 mov r8, r0
|
yading@10
|
149 vld1.64 {d16-d17},[r4,:128]!
|
yading@10
|
150 vcvt.s32.f32 q8, q8, #16
|
yading@10
|
151 vld1.64 {d18-d19},[r5,:128]!
|
yading@10
|
152 vcvt.s32.f32 q9, q9, #16
|
yading@10
|
153 vld1.64 {d20-d21},[r6,:128]!
|
yading@10
|
154 vcvt.s32.f32 q10, q10, #16
|
yading@10
|
155 vld1.64 {d22-d23},[r7,:128]!
|
yading@10
|
156 vcvt.s32.f32 q11, q11, #16
|
yading@10
|
157 6: subs lr, lr, #8
|
yading@10
|
158 vld1.64 {d0-d1}, [r4,:128]!
|
yading@10
|
159 vcvt.s32.f32 q0, q0, #16
|
yading@10
|
160 vsri.32 q9, q8, #16
|
yading@10
|
161 vld1.64 {d2-d3}, [r5,:128]!
|
yading@10
|
162 vcvt.s32.f32 q1, q1, #16
|
yading@10
|
163 vsri.32 q11, q10, #16
|
yading@10
|
164 vld1.64 {d4-d5}, [r6,:128]!
|
yading@10
|
165 vcvt.s32.f32 q2, q2, #16
|
yading@10
|
166 vzip.32 d18, d22
|
yading@10
|
167 vld1.64 {d6-d7}, [r7,:128]!
|
yading@10
|
168 vcvt.s32.f32 q3, q3, #16
|
yading@10
|
169 vzip.32 d19, d23
|
yading@10
|
170 vst1.64 {d18}, [r8], ip
|
yading@10
|
171 vsri.32 q1, q0, #16
|
yading@10
|
172 vst1.64 {d22}, [r8], ip
|
yading@10
|
173 vsri.32 q3, q2, #16
|
yading@10
|
174 vst1.64 {d19}, [r8], ip
|
yading@10
|
175 vzip.32 d2, d6
|
yading@10
|
176 vst1.64 {d23}, [r8], ip
|
yading@10
|
177 vzip.32 d3, d7
|
yading@10
|
178 beq 7f
|
yading@10
|
179 vld1.64 {d16-d17},[r4,:128]!
|
yading@10
|
180 vcvt.s32.f32 q8, q8, #16
|
yading@10
|
181 vst1.64 {d2}, [r8], ip
|
yading@10
|
182 vld1.64 {d18-d19},[r5,:128]!
|
yading@10
|
183 vcvt.s32.f32 q9, q9, #16
|
yading@10
|
184 vst1.64 {d6}, [r8], ip
|
yading@10
|
185 vld1.64 {d20-d21},[r6,:128]!
|
yading@10
|
186 vcvt.s32.f32 q10, q10, #16
|
yading@10
|
187 vst1.64 {d3}, [r8], ip
|
yading@10
|
188 vld1.64 {d22-d23},[r7,:128]!
|
yading@10
|
189 vcvt.s32.f32 q11, q11, #16
|
yading@10
|
190 vst1.64 {d7}, [r8], ip
|
yading@10
|
191 b 6b
|
yading@10
|
192 7: vst1.64 {d2}, [r8], ip
|
yading@10
|
193 vst1.64 {d6}, [r8], ip
|
yading@10
|
194 vst1.64 {d3}, [r8], ip
|
yading@10
|
195 vst1.64 {d7}, [r8], ip
|
yading@10
|
196 subs r3, r3, #4
|
yading@10
|
197 it eq
|
yading@10
|
198 popeq {r4-r8,pc}
|
yading@10
|
199 cmp r3, #4
|
yading@10
|
200 add r0, r0, #8
|
yading@10
|
201 bge 5b
|
yading@10
|
202
|
yading@10
|
203 @ 2 channels
|
yading@10
|
204 4: cmp r3, #2
|
yading@10
|
205 blt 4f
|
yading@10
|
206 ldmia r1!, {r4-r5}
|
yading@10
|
207 mov lr, r2
|
yading@10
|
208 mov r8, r0
|
yading@10
|
209 tst lr, #8
|
yading@10
|
210 vld1.64 {d16-d17},[r4,:128]!
|
yading@10
|
211 vcvt.s32.f32 q8, q8, #16
|
yading@10
|
212 vld1.64 {d18-d19},[r5,:128]!
|
yading@10
|
213 vcvt.s32.f32 q9, q9, #16
|
yading@10
|
214 vld1.64 {d20-d21},[r4,:128]!
|
yading@10
|
215 vcvt.s32.f32 q10, q10, #16
|
yading@10
|
216 vld1.64 {d22-d23},[r5,:128]!
|
yading@10
|
217 vcvt.s32.f32 q11, q11, #16
|
yading@10
|
218 beq 6f
|
yading@10
|
219 subs lr, lr, #8
|
yading@10
|
220 beq 7f
|
yading@10
|
221 vsri.32 d18, d16, #16
|
yading@10
|
222 vsri.32 d19, d17, #16
|
yading@10
|
223 vld1.64 {d16-d17},[r4,:128]!
|
yading@10
|
224 vcvt.s32.f32 q8, q8, #16
|
yading@10
|
225 vst1.32 {d18[0]}, [r8], ip
|
yading@10
|
226 vsri.32 d22, d20, #16
|
yading@10
|
227 vst1.32 {d18[1]}, [r8], ip
|
yading@10
|
228 vsri.32 d23, d21, #16
|
yading@10
|
229 vst1.32 {d19[0]}, [r8], ip
|
yading@10
|
230 vst1.32 {d19[1]}, [r8], ip
|
yading@10
|
231 vld1.64 {d18-d19},[r5,:128]!
|
yading@10
|
232 vcvt.s32.f32 q9, q9, #16
|
yading@10
|
233 vst1.32 {d22[0]}, [r8], ip
|
yading@10
|
234 vst1.32 {d22[1]}, [r8], ip
|
yading@10
|
235 vld1.64 {d20-d21},[r4,:128]!
|
yading@10
|
236 vcvt.s32.f32 q10, q10, #16
|
yading@10
|
237 vst1.32 {d23[0]}, [r8], ip
|
yading@10
|
238 vst1.32 {d23[1]}, [r8], ip
|
yading@10
|
239 vld1.64 {d22-d23},[r5,:128]!
|
yading@10
|
240 vcvt.s32.f32 q11, q11, #16
|
yading@10
|
241 6: subs lr, lr, #16
|
yading@10
|
242 vld1.64 {d0-d1}, [r4,:128]!
|
yading@10
|
243 vcvt.s32.f32 q0, q0, #16
|
yading@10
|
244 vsri.32 d18, d16, #16
|
yading@10
|
245 vld1.64 {d2-d3}, [r5,:128]!
|
yading@10
|
246 vcvt.s32.f32 q1, q1, #16
|
yading@10
|
247 vsri.32 d19, d17, #16
|
yading@10
|
248 vld1.64 {d4-d5}, [r4,:128]!
|
yading@10
|
249 vcvt.s32.f32 q2, q2, #16
|
yading@10
|
250 vld1.64 {d6-d7}, [r5,:128]!
|
yading@10
|
251 vcvt.s32.f32 q3, q3, #16
|
yading@10
|
252 vst1.32 {d18[0]}, [r8], ip
|
yading@10
|
253 vsri.32 d22, d20, #16
|
yading@10
|
254 vst1.32 {d18[1]}, [r8], ip
|
yading@10
|
255 vsri.32 d23, d21, #16
|
yading@10
|
256 vst1.32 {d19[0]}, [r8], ip
|
yading@10
|
257 vsri.32 d2, d0, #16
|
yading@10
|
258 vst1.32 {d19[1]}, [r8], ip
|
yading@10
|
259 vsri.32 d3, d1, #16
|
yading@10
|
260 vst1.32 {d22[0]}, [r8], ip
|
yading@10
|
261 vsri.32 d6, d4, #16
|
yading@10
|
262 vst1.32 {d22[1]}, [r8], ip
|
yading@10
|
263 vsri.32 d7, d5, #16
|
yading@10
|
264 vst1.32 {d23[0]}, [r8], ip
|
yading@10
|
265 vst1.32 {d23[1]}, [r8], ip
|
yading@10
|
266 beq 6f
|
yading@10
|
267 vld1.64 {d16-d17},[r4,:128]!
|
yading@10
|
268 vcvt.s32.f32 q8, q8, #16
|
yading@10
|
269 vst1.32 {d2[0]}, [r8], ip
|
yading@10
|
270 vst1.32 {d2[1]}, [r8], ip
|
yading@10
|
271 vld1.64 {d18-d19},[r5,:128]!
|
yading@10
|
272 vcvt.s32.f32 q9, q9, #16
|
yading@10
|
273 vst1.32 {d3[0]}, [r8], ip
|
yading@10
|
274 vst1.32 {d3[1]}, [r8], ip
|
yading@10
|
275 vld1.64 {d20-d21},[r4,:128]!
|
yading@10
|
276 vcvt.s32.f32 q10, q10, #16
|
yading@10
|
277 vst1.32 {d6[0]}, [r8], ip
|
yading@10
|
278 vst1.32 {d6[1]}, [r8], ip
|
yading@10
|
279 vld1.64 {d22-d23},[r5,:128]!
|
yading@10
|
280 vcvt.s32.f32 q11, q11, #16
|
yading@10
|
281 vst1.32 {d7[0]}, [r8], ip
|
yading@10
|
282 vst1.32 {d7[1]}, [r8], ip
|
yading@10
|
283 bgt 6b
|
yading@10
|
284 6: vst1.32 {d2[0]}, [r8], ip
|
yading@10
|
285 vst1.32 {d2[1]}, [r8], ip
|
yading@10
|
286 vst1.32 {d3[0]}, [r8], ip
|
yading@10
|
287 vst1.32 {d3[1]}, [r8], ip
|
yading@10
|
288 vst1.32 {d6[0]}, [r8], ip
|
yading@10
|
289 vst1.32 {d6[1]}, [r8], ip
|
yading@10
|
290 vst1.32 {d7[0]}, [r8], ip
|
yading@10
|
291 vst1.32 {d7[1]}, [r8], ip
|
yading@10
|
292 b 8f
|
yading@10
|
293 7: vsri.32 d18, d16, #16
|
yading@10
|
294 vsri.32 d19, d17, #16
|
yading@10
|
295 vst1.32 {d18[0]}, [r8], ip
|
yading@10
|
296 vsri.32 d22, d20, #16
|
yading@10
|
297 vst1.32 {d18[1]}, [r8], ip
|
yading@10
|
298 vsri.32 d23, d21, #16
|
yading@10
|
299 vst1.32 {d19[0]}, [r8], ip
|
yading@10
|
300 vst1.32 {d19[1]}, [r8], ip
|
yading@10
|
301 vst1.32 {d22[0]}, [r8], ip
|
yading@10
|
302 vst1.32 {d22[1]}, [r8], ip
|
yading@10
|
303 vst1.32 {d23[0]}, [r8], ip
|
yading@10
|
304 vst1.32 {d23[1]}, [r8], ip
|
yading@10
|
305 8: subs r3, r3, #2
|
yading@10
|
306 add r0, r0, #4
|
yading@10
|
307 it eq
|
yading@10
|
308 popeq {r4-r8,pc}
|
yading@10
|
309
|
yading@10
|
310 @ 1 channel
|
yading@10
|
311 4: ldr r4, [r1],#4
|
yading@10
|
312 tst r2, #8
|
yading@10
|
313 mov lr, r2
|
yading@10
|
314 mov r5, r0
|
yading@10
|
315 vld1.64 {d0-d1}, [r4,:128]!
|
yading@10
|
316 vcvt.s32.f32 q0, q0, #16
|
yading@10
|
317 vld1.64 {d2-d3}, [r4,:128]!
|
yading@10
|
318 vcvt.s32.f32 q1, q1, #16
|
yading@10
|
319 bne 8f
|
yading@10
|
320 6: subs lr, lr, #16
|
yading@10
|
321 vld1.64 {d4-d5}, [r4,:128]!
|
yading@10
|
322 vcvt.s32.f32 q2, q2, #16
|
yading@10
|
323 vld1.64 {d6-d7}, [r4,:128]!
|
yading@10
|
324 vcvt.s32.f32 q3, q3, #16
|
yading@10
|
325 vst1.16 {d0[1]}, [r5,:16], ip
|
yading@10
|
326 vst1.16 {d0[3]}, [r5,:16], ip
|
yading@10
|
327 vst1.16 {d1[1]}, [r5,:16], ip
|
yading@10
|
328 vst1.16 {d1[3]}, [r5,:16], ip
|
yading@10
|
329 vst1.16 {d2[1]}, [r5,:16], ip
|
yading@10
|
330 vst1.16 {d2[3]}, [r5,:16], ip
|
yading@10
|
331 vst1.16 {d3[1]}, [r5,:16], ip
|
yading@10
|
332 vst1.16 {d3[3]}, [r5,:16], ip
|
yading@10
|
333 beq 7f
|
yading@10
|
334 vld1.64 {d0-d1}, [r4,:128]!
|
yading@10
|
335 vcvt.s32.f32 q0, q0, #16
|
yading@10
|
336 vld1.64 {d2-d3}, [r4,:128]!
|
yading@10
|
337 vcvt.s32.f32 q1, q1, #16
|
yading@10
|
338 7: vst1.16 {d4[1]}, [r5,:16], ip
|
yading@10
|
339 vst1.16 {d4[3]}, [r5,:16], ip
|
yading@10
|
340 vst1.16 {d5[1]}, [r5,:16], ip
|
yading@10
|
341 vst1.16 {d5[3]}, [r5,:16], ip
|
yading@10
|
342 vst1.16 {d6[1]}, [r5,:16], ip
|
yading@10
|
343 vst1.16 {d6[3]}, [r5,:16], ip
|
yading@10
|
344 vst1.16 {d7[1]}, [r5,:16], ip
|
yading@10
|
345 vst1.16 {d7[3]}, [r5,:16], ip
|
yading@10
|
346 bgt 6b
|
yading@10
|
347 pop {r4-r8,pc}
|
yading@10
|
348 8: subs lr, lr, #8
|
yading@10
|
349 vst1.16 {d0[1]}, [r5,:16], ip
|
yading@10
|
350 vst1.16 {d0[3]}, [r5,:16], ip
|
yading@10
|
351 vst1.16 {d1[1]}, [r5,:16], ip
|
yading@10
|
352 vst1.16 {d1[3]}, [r5,:16], ip
|
yading@10
|
353 vst1.16 {d2[1]}, [r5,:16], ip
|
yading@10
|
354 vst1.16 {d2[3]}, [r5,:16], ip
|
yading@10
|
355 vst1.16 {d3[1]}, [r5,:16], ip
|
yading@10
|
356 vst1.16 {d3[3]}, [r5,:16], ip
|
yading@10
|
357 it eq
|
yading@10
|
358 popeq {r4-r8,pc}
|
yading@10
|
359 vld1.64 {d0-d1}, [r4,:128]!
|
yading@10
|
360 vcvt.s32.f32 q0, q0, #16
|
yading@10
|
361 vld1.64 {d2-d3}, [r4,:128]!
|
yading@10
|
362 vcvt.s32.f32 q1, q1, #16
|
yading@10
|
363 b 6b
|
yading@10
|
364 endfunc
|
yading@10
|
365
|
yading@10
|
366 function ff_int32_to_float_fmul_scalar_neon, export=1
|
yading@10
|
367 VFP vdup.32 q0, d0[0]
|
yading@10
|
368 VFP len .req r2
|
yading@10
|
369 NOVFP vdup.32 q0, r2
|
yading@10
|
370 NOVFP len .req r3
|
yading@10
|
371
|
yading@10
|
372 vld1.32 {q1},[r1,:128]!
|
yading@10
|
373 vcvt.f32.s32 q3, q1
|
yading@10
|
374 vld1.32 {q2},[r1,:128]!
|
yading@10
|
375 vcvt.f32.s32 q8, q2
|
yading@10
|
376 1: subs len, len, #8
|
yading@10
|
377 pld [r1, #16]
|
yading@10
|
378 vmul.f32 q9, q3, q0
|
yading@10
|
379 vmul.f32 q10, q8, q0
|
yading@10
|
380 beq 2f
|
yading@10
|
381 vld1.32 {q1},[r1,:128]!
|
yading@10
|
382 vcvt.f32.s32 q3, q1
|
yading@10
|
383 vld1.32 {q2},[r1,:128]!
|
yading@10
|
384 vcvt.f32.s32 q8, q2
|
yading@10
|
385 vst1.32 {q9}, [r0,:128]!
|
yading@10
|
386 vst1.32 {q10},[r0,:128]!
|
yading@10
|
387 b 1b
|
yading@10
|
388 2: vst1.32 {q9}, [r0,:128]!
|
yading@10
|
389 vst1.32 {q10},[r0,:128]!
|
yading@10
|
390 bx lr
|
yading@10
|
391 .unreq len
|
yading@10
|
392 endfunc
|