yading@10
|
1 /*
|
yading@10
|
2 * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
|
yading@10
|
3 *
|
yading@10
|
4 * This file is part of FFmpeg.
|
yading@10
|
5 *
|
yading@10
|
6 * FFmpeg is free software; you can redistribute it and/or
|
yading@10
|
7 * modify it under the terms of the GNU Lesser General Public
|
yading@10
|
8 * License as published by the Free Software Foundation; either
|
yading@10
|
9 * version 2.1 of the License, or (at your option) any later version.
|
yading@10
|
10 *
|
yading@10
|
11 * FFmpeg is distributed in the hope that it will be useful,
|
yading@10
|
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
yading@10
|
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
yading@10
|
14 * Lesser General Public License for more details.
|
yading@10
|
15 *
|
yading@10
|
16 * You should have received a copy of the GNU Lesser General Public
|
yading@10
|
17 * License along with FFmpeg; if not, write to the Free Software
|
yading@10
|
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
yading@10
|
19 */
|
yading@10
|
20
|
yading@10
|
21 #include "libavutil/arm/asm.S"
|
yading@10
|
22 #include "neon.S"
|
yading@10
|
23
|
yading@10
|
24 /* H.264 qpel MC */
|
yading@10
|
25
|
yading@10
|
26 .macro lowpass_const r
|
yading@10
|
27 movw \r, #5
|
yading@10
|
28 movt \r, #20
|
yading@10
|
29 vmov.32 d6[0], \r
|
yading@10
|
30 .endm
|
yading@10
|
31
|
yading@10
|
32 .macro lowpass_8 r0, r1, r2, r3, d0, d1, narrow=1
|
yading@10
|
33 .if \narrow
|
yading@10
|
34 t0 .req q0
|
yading@10
|
35 t1 .req q8
|
yading@10
|
36 .else
|
yading@10
|
37 t0 .req \d0
|
yading@10
|
38 t1 .req \d1
|
yading@10
|
39 .endif
|
yading@10
|
40 vext.8 d2, \r0, \r1, #2
|
yading@10
|
41 vext.8 d3, \r0, \r1, #3
|
yading@10
|
42 vaddl.u8 q1, d2, d3
|
yading@10
|
43 vext.8 d4, \r0, \r1, #1
|
yading@10
|
44 vext.8 d5, \r0, \r1, #4
|
yading@10
|
45 vaddl.u8 q2, d4, d5
|
yading@10
|
46 vext.8 d30, \r0, \r1, #5
|
yading@10
|
47 vaddl.u8 t0, \r0, d30
|
yading@10
|
48 vext.8 d18, \r2, \r3, #2
|
yading@10
|
49 vmla.i16 t0, q1, d6[1]
|
yading@10
|
50 vext.8 d19, \r2, \r3, #3
|
yading@10
|
51 vaddl.u8 q9, d18, d19
|
yading@10
|
52 vext.8 d20, \r2, \r3, #1
|
yading@10
|
53 vmls.i16 t0, q2, d6[0]
|
yading@10
|
54 vext.8 d21, \r2, \r3, #4
|
yading@10
|
55 vaddl.u8 q10, d20, d21
|
yading@10
|
56 vext.8 d31, \r2, \r3, #5
|
yading@10
|
57 vaddl.u8 t1, \r2, d31
|
yading@10
|
58 vmla.i16 t1, q9, d6[1]
|
yading@10
|
59 vmls.i16 t1, q10, d6[0]
|
yading@10
|
60 .if \narrow
|
yading@10
|
61 vqrshrun.s16 \d0, t0, #5
|
yading@10
|
62 vqrshrun.s16 \d1, t1, #5
|
yading@10
|
63 .endif
|
yading@10
|
64 .unreq t0
|
yading@10
|
65 .unreq t1
|
yading@10
|
66 .endm
|
yading@10
|
67
|
yading@10
|
68 .macro lowpass_8_1 r0, r1, d0, narrow=1
|
yading@10
|
69 .if \narrow
|
yading@10
|
70 t0 .req q0
|
yading@10
|
71 .else
|
yading@10
|
72 t0 .req \d0
|
yading@10
|
73 .endif
|
yading@10
|
74 vext.8 d2, \r0, \r1, #2
|
yading@10
|
75 vext.8 d3, \r0, \r1, #3
|
yading@10
|
76 vaddl.u8 q1, d2, d3
|
yading@10
|
77 vext.8 d4, \r0, \r1, #1
|
yading@10
|
78 vext.8 d5, \r0, \r1, #4
|
yading@10
|
79 vaddl.u8 q2, d4, d5
|
yading@10
|
80 vext.8 d30, \r0, \r1, #5
|
yading@10
|
81 vaddl.u8 t0, \r0, d30
|
yading@10
|
82 vmla.i16 t0, q1, d6[1]
|
yading@10
|
83 vmls.i16 t0, q2, d6[0]
|
yading@10
|
84 .if \narrow
|
yading@10
|
85 vqrshrun.s16 \d0, t0, #5
|
yading@10
|
86 .endif
|
yading@10
|
87 .unreq t0
|
yading@10
|
88 .endm
|
yading@10
|
89
|
yading@10
|
90 .macro lowpass_8.16 r0, r1, l0, h0, l1, h1, d
|
yading@10
|
91 vext.16 q1, \r0, \r1, #2
|
yading@10
|
92 vext.16 q0, \r0, \r1, #3
|
yading@10
|
93 vaddl.s16 q9, d2, d0
|
yading@10
|
94 vext.16 q2, \r0, \r1, #1
|
yading@10
|
95 vaddl.s16 q1, d3, d1
|
yading@10
|
96 vext.16 q3, \r0, \r1, #4
|
yading@10
|
97 vaddl.s16 q10, d4, d6
|
yading@10
|
98 vext.16 \r1, \r0, \r1, #5
|
yading@10
|
99 vaddl.s16 q2, d5, d7
|
yading@10
|
100 vaddl.s16 q0, \h0, \h1
|
yading@10
|
101 vaddl.s16 q8, \l0, \l1
|
yading@10
|
102
|
yading@10
|
103 vshl.i32 q3, q9, #4
|
yading@10
|
104 vshl.i32 q9, q9, #2
|
yading@10
|
105 vshl.i32 q15, q10, #2
|
yading@10
|
106 vadd.i32 q9, q9, q3
|
yading@10
|
107 vadd.i32 q10, q10, q15
|
yading@10
|
108
|
yading@10
|
109 vshl.i32 q3, q1, #4
|
yading@10
|
110 vshl.i32 q1, q1, #2
|
yading@10
|
111 vshl.i32 q15, q2, #2
|
yading@10
|
112 vadd.i32 q1, q1, q3
|
yading@10
|
113 vadd.i32 q2, q2, q15
|
yading@10
|
114
|
yading@10
|
115 vadd.i32 q9, q9, q8
|
yading@10
|
116 vsub.i32 q9, q9, q10
|
yading@10
|
117
|
yading@10
|
118 vadd.i32 q1, q1, q0
|
yading@10
|
119 vsub.i32 q1, q1, q2
|
yading@10
|
120
|
yading@10
|
121 vrshrn.s32 d18, q9, #10
|
yading@10
|
122 vrshrn.s32 d19, q1, #10
|
yading@10
|
123
|
yading@10
|
124 vqmovun.s16 \d, q9
|
yading@10
|
125 .endm
|
yading@10
|
126
|
yading@10
|
127 function put_h264_qpel16_h_lowpass_neon_packed
|
yading@10
|
128 mov r4, lr
|
yading@10
|
129 mov r12, #16
|
yading@10
|
130 mov r3, #8
|
yading@10
|
131 bl put_h264_qpel8_h_lowpass_neon
|
yading@10
|
132 sub r1, r1, r2, lsl #4
|
yading@10
|
133 add r1, r1, #8
|
yading@10
|
134 mov r12, #16
|
yading@10
|
135 mov lr, r4
|
yading@10
|
136 b put_h264_qpel8_h_lowpass_neon
|
yading@10
|
137 endfunc
|
yading@10
|
138
|
yading@10
|
139 .macro h264_qpel_h_lowpass type
|
yading@10
|
140 function \type\()_h264_qpel16_h_lowpass_neon
|
yading@10
|
141 push {lr}
|
yading@10
|
142 mov r12, #16
|
yading@10
|
143 bl \type\()_h264_qpel8_h_lowpass_neon
|
yading@10
|
144 sub r0, r0, r3, lsl #4
|
yading@10
|
145 sub r1, r1, r2, lsl #4
|
yading@10
|
146 add r0, r0, #8
|
yading@10
|
147 add r1, r1, #8
|
yading@10
|
148 mov r12, #16
|
yading@10
|
149 pop {lr}
|
yading@10
|
150 endfunc
|
yading@10
|
151
|
yading@10
|
152 function \type\()_h264_qpel8_h_lowpass_neon
|
yading@10
|
153 1: vld1.8 {d0, d1}, [r1], r2
|
yading@10
|
154 vld1.8 {d16,d17}, [r1], r2
|
yading@10
|
155 subs r12, r12, #2
|
yading@10
|
156 lowpass_8 d0, d1, d16, d17, d0, d16
|
yading@10
|
157 .ifc \type,avg
|
yading@10
|
158 vld1.8 {d2}, [r0,:64], r3
|
yading@10
|
159 vrhadd.u8 d0, d0, d2
|
yading@10
|
160 vld1.8 {d3}, [r0,:64]
|
yading@10
|
161 vrhadd.u8 d16, d16, d3
|
yading@10
|
162 sub r0, r0, r3
|
yading@10
|
163 .endif
|
yading@10
|
164 vst1.8 {d0}, [r0,:64], r3
|
yading@10
|
165 vst1.8 {d16}, [r0,:64], r3
|
yading@10
|
166 bne 1b
|
yading@10
|
167 bx lr
|
yading@10
|
168 endfunc
|
yading@10
|
169 .endm
|
yading@10
|
170
|
yading@10
|
171 h264_qpel_h_lowpass put
|
yading@10
|
172 h264_qpel_h_lowpass avg
|
yading@10
|
173
|
yading@10
|
174 .macro h264_qpel_h_lowpass_l2 type
|
yading@10
|
175 function \type\()_h264_qpel16_h_lowpass_l2_neon
|
yading@10
|
176 push {lr}
|
yading@10
|
177 mov r12, #16
|
yading@10
|
178 bl \type\()_h264_qpel8_h_lowpass_l2_neon
|
yading@10
|
179 sub r0, r0, r2, lsl #4
|
yading@10
|
180 sub r1, r1, r2, lsl #4
|
yading@10
|
181 sub r3, r3, r2, lsl #4
|
yading@10
|
182 add r0, r0, #8
|
yading@10
|
183 add r1, r1, #8
|
yading@10
|
184 add r3, r3, #8
|
yading@10
|
185 mov r12, #16
|
yading@10
|
186 pop {lr}
|
yading@10
|
187 endfunc
|
yading@10
|
188
|
yading@10
|
189 function \type\()_h264_qpel8_h_lowpass_l2_neon
|
yading@10
|
190 1: vld1.8 {d0, d1}, [r1], r2
|
yading@10
|
191 vld1.8 {d16,d17}, [r1], r2
|
yading@10
|
192 vld1.8 {d28}, [r3], r2
|
yading@10
|
193 vld1.8 {d29}, [r3], r2
|
yading@10
|
194 subs r12, r12, #2
|
yading@10
|
195 lowpass_8 d0, d1, d16, d17, d0, d1
|
yading@10
|
196 vrhadd.u8 q0, q0, q14
|
yading@10
|
197 .ifc \type,avg
|
yading@10
|
198 vld1.8 {d2}, [r0,:64], r2
|
yading@10
|
199 vrhadd.u8 d0, d0, d2
|
yading@10
|
200 vld1.8 {d3}, [r0,:64]
|
yading@10
|
201 vrhadd.u8 d1, d1, d3
|
yading@10
|
202 sub r0, r0, r2
|
yading@10
|
203 .endif
|
yading@10
|
204 vst1.8 {d0}, [r0,:64], r2
|
yading@10
|
205 vst1.8 {d1}, [r0,:64], r2
|
yading@10
|
206 bne 1b
|
yading@10
|
207 bx lr
|
yading@10
|
208 endfunc
|
yading@10
|
209 .endm
|
yading@10
|
210
|
yading@10
|
211 h264_qpel_h_lowpass_l2 put
|
yading@10
|
212 h264_qpel_h_lowpass_l2 avg
|
yading@10
|
213
|
yading@10
|
214 function put_h264_qpel16_v_lowpass_neon_packed
|
yading@10
|
215 mov r4, lr
|
yading@10
|
216 mov r2, #8
|
yading@10
|
217 bl put_h264_qpel8_v_lowpass_neon
|
yading@10
|
218 sub r1, r1, r3, lsl #2
|
yading@10
|
219 bl put_h264_qpel8_v_lowpass_neon
|
yading@10
|
220 sub r1, r1, r3, lsl #4
|
yading@10
|
221 sub r1, r1, r3, lsl #2
|
yading@10
|
222 add r1, r1, #8
|
yading@10
|
223 bl put_h264_qpel8_v_lowpass_neon
|
yading@10
|
224 sub r1, r1, r3, lsl #2
|
yading@10
|
225 mov lr, r4
|
yading@10
|
226 b put_h264_qpel8_v_lowpass_neon
|
yading@10
|
227 endfunc
|
yading@10
|
228
|
yading@10
|
229 .macro h264_qpel_v_lowpass type
|
yading@10
|
230 function \type\()_h264_qpel16_v_lowpass_neon
|
yading@10
|
231 mov r4, lr
|
yading@10
|
232 bl \type\()_h264_qpel8_v_lowpass_neon
|
yading@10
|
233 sub r1, r1, r3, lsl #2
|
yading@10
|
234 bl \type\()_h264_qpel8_v_lowpass_neon
|
yading@10
|
235 sub r0, r0, r2, lsl #4
|
yading@10
|
236 add r0, r0, #8
|
yading@10
|
237 sub r1, r1, r3, lsl #4
|
yading@10
|
238 sub r1, r1, r3, lsl #2
|
yading@10
|
239 add r1, r1, #8
|
yading@10
|
240 bl \type\()_h264_qpel8_v_lowpass_neon
|
yading@10
|
241 sub r1, r1, r3, lsl #2
|
yading@10
|
242 mov lr, r4
|
yading@10
|
243 endfunc
|
yading@10
|
244
|
yading@10
|
245 function \type\()_h264_qpel8_v_lowpass_neon
|
yading@10
|
246 vld1.8 {d8}, [r1], r3
|
yading@10
|
247 vld1.8 {d10}, [r1], r3
|
yading@10
|
248 vld1.8 {d12}, [r1], r3
|
yading@10
|
249 vld1.8 {d14}, [r1], r3
|
yading@10
|
250 vld1.8 {d22}, [r1], r3
|
yading@10
|
251 vld1.8 {d24}, [r1], r3
|
yading@10
|
252 vld1.8 {d26}, [r1], r3
|
yading@10
|
253 vld1.8 {d28}, [r1], r3
|
yading@10
|
254 vld1.8 {d9}, [r1], r3
|
yading@10
|
255 vld1.8 {d11}, [r1], r3
|
yading@10
|
256 vld1.8 {d13}, [r1], r3
|
yading@10
|
257 vld1.8 {d15}, [r1], r3
|
yading@10
|
258 vld1.8 {d23}, [r1]
|
yading@10
|
259
|
yading@10
|
260 transpose_8x8 q4, q5, q6, q7, q11, q12, q13, q14
|
yading@10
|
261 lowpass_8 d8, d9, d10, d11, d8, d10
|
yading@10
|
262 lowpass_8 d12, d13, d14, d15, d12, d14
|
yading@10
|
263 lowpass_8 d22, d23, d24, d25, d22, d24
|
yading@10
|
264 lowpass_8 d26, d27, d28, d29, d26, d28
|
yading@10
|
265 transpose_8x8 d8, d10, d12, d14, d22, d24, d26, d28
|
yading@10
|
266
|
yading@10
|
267 .ifc \type,avg
|
yading@10
|
268 vld1.8 {d9}, [r0,:64], r2
|
yading@10
|
269 vrhadd.u8 d8, d8, d9
|
yading@10
|
270 vld1.8 {d11}, [r0,:64], r2
|
yading@10
|
271 vrhadd.u8 d10, d10, d11
|
yading@10
|
272 vld1.8 {d13}, [r0,:64], r2
|
yading@10
|
273 vrhadd.u8 d12, d12, d13
|
yading@10
|
274 vld1.8 {d15}, [r0,:64], r2
|
yading@10
|
275 vrhadd.u8 d14, d14, d15
|
yading@10
|
276 vld1.8 {d23}, [r0,:64], r2
|
yading@10
|
277 vrhadd.u8 d22, d22, d23
|
yading@10
|
278 vld1.8 {d25}, [r0,:64], r2
|
yading@10
|
279 vrhadd.u8 d24, d24, d25
|
yading@10
|
280 vld1.8 {d27}, [r0,:64], r2
|
yading@10
|
281 vrhadd.u8 d26, d26, d27
|
yading@10
|
282 vld1.8 {d29}, [r0,:64], r2
|
yading@10
|
283 vrhadd.u8 d28, d28, d29
|
yading@10
|
284 sub r0, r0, r2, lsl #3
|
yading@10
|
285 .endif
|
yading@10
|
286
|
yading@10
|
287 vst1.8 {d8}, [r0,:64], r2
|
yading@10
|
288 vst1.8 {d10}, [r0,:64], r2
|
yading@10
|
289 vst1.8 {d12}, [r0,:64], r2
|
yading@10
|
290 vst1.8 {d14}, [r0,:64], r2
|
yading@10
|
291 vst1.8 {d22}, [r0,:64], r2
|
yading@10
|
292 vst1.8 {d24}, [r0,:64], r2
|
yading@10
|
293 vst1.8 {d26}, [r0,:64], r2
|
yading@10
|
294 vst1.8 {d28}, [r0,:64], r2
|
yading@10
|
295
|
yading@10
|
296 bx lr
|
yading@10
|
297 endfunc
|
yading@10
|
298 .endm
|
yading@10
|
299
|
yading@10
|
300 h264_qpel_v_lowpass put
|
yading@10
|
301 h264_qpel_v_lowpass avg
|
yading@10
|
302
|
yading@10
|
303 .macro h264_qpel_v_lowpass_l2 type
|
yading@10
|
304 function \type\()_h264_qpel16_v_lowpass_l2_neon
|
yading@10
|
305 mov r4, lr
|
yading@10
|
306 bl \type\()_h264_qpel8_v_lowpass_l2_neon
|
yading@10
|
307 sub r1, r1, r3, lsl #2
|
yading@10
|
308 bl \type\()_h264_qpel8_v_lowpass_l2_neon
|
yading@10
|
309 sub r0, r0, r3, lsl #4
|
yading@10
|
310 sub r12, r12, r2, lsl #4
|
yading@10
|
311 add r0, r0, #8
|
yading@10
|
312 add r12, r12, #8
|
yading@10
|
313 sub r1, r1, r3, lsl #4
|
yading@10
|
314 sub r1, r1, r3, lsl #2
|
yading@10
|
315 add r1, r1, #8
|
yading@10
|
316 bl \type\()_h264_qpel8_v_lowpass_l2_neon
|
yading@10
|
317 sub r1, r1, r3, lsl #2
|
yading@10
|
318 mov lr, r4
|
yading@10
|
319 endfunc
|
yading@10
|
320
|
yading@10
|
321 function \type\()_h264_qpel8_v_lowpass_l2_neon
|
yading@10
|
322 vld1.8 {d8}, [r1], r3
|
yading@10
|
323 vld1.8 {d10}, [r1], r3
|
yading@10
|
324 vld1.8 {d12}, [r1], r3
|
yading@10
|
325 vld1.8 {d14}, [r1], r3
|
yading@10
|
326 vld1.8 {d22}, [r1], r3
|
yading@10
|
327 vld1.8 {d24}, [r1], r3
|
yading@10
|
328 vld1.8 {d26}, [r1], r3
|
yading@10
|
329 vld1.8 {d28}, [r1], r3
|
yading@10
|
330 vld1.8 {d9}, [r1], r3
|
yading@10
|
331 vld1.8 {d11}, [r1], r3
|
yading@10
|
332 vld1.8 {d13}, [r1], r3
|
yading@10
|
333 vld1.8 {d15}, [r1], r3
|
yading@10
|
334 vld1.8 {d23}, [r1]
|
yading@10
|
335
|
yading@10
|
336 transpose_8x8 q4, q5, q6, q7, q11, q12, q13, q14
|
yading@10
|
337 lowpass_8 d8, d9, d10, d11, d8, d9
|
yading@10
|
338 lowpass_8 d12, d13, d14, d15, d12, d13
|
yading@10
|
339 lowpass_8 d22, d23, d24, d25, d22, d23
|
yading@10
|
340 lowpass_8 d26, d27, d28, d29, d26, d27
|
yading@10
|
341 transpose_8x8 d8, d9, d12, d13, d22, d23, d26, d27
|
yading@10
|
342
|
yading@10
|
343 vld1.8 {d0}, [r12], r2
|
yading@10
|
344 vld1.8 {d1}, [r12], r2
|
yading@10
|
345 vld1.8 {d2}, [r12], r2
|
yading@10
|
346 vld1.8 {d3}, [r12], r2
|
yading@10
|
347 vld1.8 {d4}, [r12], r2
|
yading@10
|
348 vrhadd.u8 q0, q0, q4
|
yading@10
|
349 vld1.8 {d5}, [r12], r2
|
yading@10
|
350 vrhadd.u8 q1, q1, q6
|
yading@10
|
351 vld1.8 {d10}, [r12], r2
|
yading@10
|
352 vrhadd.u8 q2, q2, q11
|
yading@10
|
353 vld1.8 {d11}, [r12], r2
|
yading@10
|
354 vrhadd.u8 q5, q5, q13
|
yading@10
|
355
|
yading@10
|
356 .ifc \type,avg
|
yading@10
|
357 vld1.8 {d16}, [r0,:64], r3
|
yading@10
|
358 vrhadd.u8 d0, d0, d16
|
yading@10
|
359 vld1.8 {d17}, [r0,:64], r3
|
yading@10
|
360 vrhadd.u8 d1, d1, d17
|
yading@10
|
361 vld1.8 {d16}, [r0,:64], r3
|
yading@10
|
362 vrhadd.u8 d2, d2, d16
|
yading@10
|
363 vld1.8 {d17}, [r0,:64], r3
|
yading@10
|
364 vrhadd.u8 d3, d3, d17
|
yading@10
|
365 vld1.8 {d16}, [r0,:64], r3
|
yading@10
|
366 vrhadd.u8 d4, d4, d16
|
yading@10
|
367 vld1.8 {d17}, [r0,:64], r3
|
yading@10
|
368 vrhadd.u8 d5, d5, d17
|
yading@10
|
369 vld1.8 {d16}, [r0,:64], r3
|
yading@10
|
370 vrhadd.u8 d10, d10, d16
|
yading@10
|
371 vld1.8 {d17}, [r0,:64], r3
|
yading@10
|
372 vrhadd.u8 d11, d11, d17
|
yading@10
|
373 sub r0, r0, r3, lsl #3
|
yading@10
|
374 .endif
|
yading@10
|
375
|
yading@10
|
376 vst1.8 {d0}, [r0,:64], r3
|
yading@10
|
377 vst1.8 {d1}, [r0,:64], r3
|
yading@10
|
378 vst1.8 {d2}, [r0,:64], r3
|
yading@10
|
379 vst1.8 {d3}, [r0,:64], r3
|
yading@10
|
380 vst1.8 {d4}, [r0,:64], r3
|
yading@10
|
381 vst1.8 {d5}, [r0,:64], r3
|
yading@10
|
382 vst1.8 {d10}, [r0,:64], r3
|
yading@10
|
383 vst1.8 {d11}, [r0,:64], r3
|
yading@10
|
384
|
yading@10
|
385 bx lr
|
yading@10
|
386 endfunc
|
yading@10
|
387 .endm
|
yading@10
|
388
|
yading@10
|
389 h264_qpel_v_lowpass_l2 put
|
yading@10
|
390 h264_qpel_v_lowpass_l2 avg
|
yading@10
|
391
|
yading@10
|
392 function put_h264_qpel8_hv_lowpass_neon_top
|
yading@10
|
393 lowpass_const r12
|
yading@10
|
394 mov r12, #12
|
yading@10
|
395 1: vld1.8 {d0, d1}, [r1], r3
|
yading@10
|
396 vld1.8 {d16,d17}, [r1], r3
|
yading@10
|
397 subs r12, r12, #2
|
yading@10
|
398 lowpass_8 d0, d1, d16, d17, q11, q12, narrow=0
|
yading@10
|
399 vst1.8 {d22-d25}, [r4,:128]!
|
yading@10
|
400 bne 1b
|
yading@10
|
401
|
yading@10
|
402 vld1.8 {d0, d1}, [r1]
|
yading@10
|
403 lowpass_8_1 d0, d1, q12, narrow=0
|
yading@10
|
404
|
yading@10
|
405 mov r12, #-16
|
yading@10
|
406 add r4, r4, r12
|
yading@10
|
407 vld1.8 {d30,d31}, [r4,:128], r12
|
yading@10
|
408 vld1.8 {d20,d21}, [r4,:128], r12
|
yading@10
|
409 vld1.8 {d18,d19}, [r4,:128], r12
|
yading@10
|
410 vld1.8 {d16,d17}, [r4,:128], r12
|
yading@10
|
411 vld1.8 {d14,d15}, [r4,:128], r12
|
yading@10
|
412 vld1.8 {d12,d13}, [r4,:128], r12
|
yading@10
|
413 vld1.8 {d10,d11}, [r4,:128], r12
|
yading@10
|
414 vld1.8 {d8, d9}, [r4,:128], r12
|
yading@10
|
415 vld1.8 {d6, d7}, [r4,:128], r12
|
yading@10
|
416 vld1.8 {d4, d5}, [r4,:128], r12
|
yading@10
|
417 vld1.8 {d2, d3}, [r4,:128], r12
|
yading@10
|
418 vld1.8 {d0, d1}, [r4,:128]
|
yading@10
|
419
|
yading@10
|
420 swap4 d1, d3, d5, d7, d8, d10, d12, d14
|
yading@10
|
421 transpose16_4x4 q0, q1, q2, q3, q4, q5, q6, q7
|
yading@10
|
422
|
yading@10
|
423 swap4 d17, d19, d21, d31, d24, d26, d28, d22
|
yading@10
|
424 transpose16_4x4 q8, q9, q10, q15, q12, q13, q14, q11
|
yading@10
|
425
|
yading@10
|
426 vst1.8 {d30,d31}, [r4,:128]!
|
yading@10
|
427 vst1.8 {d6, d7}, [r4,:128]!
|
yading@10
|
428 vst1.8 {d20,d21}, [r4,:128]!
|
yading@10
|
429 vst1.8 {d4, d5}, [r4,:128]!
|
yading@10
|
430 vst1.8 {d18,d19}, [r4,:128]!
|
yading@10
|
431 vst1.8 {d2, d3}, [r4,:128]!
|
yading@10
|
432 vst1.8 {d16,d17}, [r4,:128]!
|
yading@10
|
433 vst1.8 {d0, d1}, [r4,:128]
|
yading@10
|
434
|
yading@10
|
435 lowpass_8.16 q4, q12, d8, d9, d24, d25, d8
|
yading@10
|
436 lowpass_8.16 q5, q13, d10, d11, d26, d27, d9
|
yading@10
|
437 lowpass_8.16 q6, q14, d12, d13, d28, d29, d10
|
yading@10
|
438 lowpass_8.16 q7, q11, d14, d15, d22, d23, d11
|
yading@10
|
439
|
yading@10
|
440 vld1.8 {d16,d17}, [r4,:128], r12
|
yading@10
|
441 vld1.8 {d30,d31}, [r4,:128], r12
|
yading@10
|
442 lowpass_8.16 q8, q15, d16, d17, d30, d31, d12
|
yading@10
|
443 vld1.8 {d16,d17}, [r4,:128], r12
|
yading@10
|
444 vld1.8 {d30,d31}, [r4,:128], r12
|
yading@10
|
445 lowpass_8.16 q8, q15, d16, d17, d30, d31, d13
|
yading@10
|
446 vld1.8 {d16,d17}, [r4,:128], r12
|
yading@10
|
447 vld1.8 {d30,d31}, [r4,:128], r12
|
yading@10
|
448 lowpass_8.16 q8, q15, d16, d17, d30, d31, d14
|
yading@10
|
449 vld1.8 {d16,d17}, [r4,:128], r12
|
yading@10
|
450 vld1.8 {d30,d31}, [r4,:128]
|
yading@10
|
451 lowpass_8.16 q8, q15, d16, d17, d30, d31, d15
|
yading@10
|
452
|
yading@10
|
453 transpose_8x8 d12, d13, d14, d15, d8, d9, d10, d11
|
yading@10
|
454
|
yading@10
|
455 bx lr
|
yading@10
|
456 endfunc
|
yading@10
|
457
|
yading@10
|
458 .macro h264_qpel8_hv_lowpass type
|
yading@10
|
459 function \type\()_h264_qpel8_hv_lowpass_neon
|
yading@10
|
460 mov r10, lr
|
yading@10
|
461 bl put_h264_qpel8_hv_lowpass_neon_top
|
yading@10
|
462 .ifc \type,avg
|
yading@10
|
463 vld1.8 {d0}, [r0,:64], r2
|
yading@10
|
464 vrhadd.u8 d12, d12, d0
|
yading@10
|
465 vld1.8 {d1}, [r0,:64], r2
|
yading@10
|
466 vrhadd.u8 d13, d13, d1
|
yading@10
|
467 vld1.8 {d2}, [r0,:64], r2
|
yading@10
|
468 vrhadd.u8 d14, d14, d2
|
yading@10
|
469 vld1.8 {d3}, [r0,:64], r2
|
yading@10
|
470 vrhadd.u8 d15, d15, d3
|
yading@10
|
471 vld1.8 {d4}, [r0,:64], r2
|
yading@10
|
472 vrhadd.u8 d8, d8, d4
|
yading@10
|
473 vld1.8 {d5}, [r0,:64], r2
|
yading@10
|
474 vrhadd.u8 d9, d9, d5
|
yading@10
|
475 vld1.8 {d6}, [r0,:64], r2
|
yading@10
|
476 vrhadd.u8 d10, d10, d6
|
yading@10
|
477 vld1.8 {d7}, [r0,:64], r2
|
yading@10
|
478 vrhadd.u8 d11, d11, d7
|
yading@10
|
479 sub r0, r0, r2, lsl #3
|
yading@10
|
480 .endif
|
yading@10
|
481
|
yading@10
|
482 vst1.8 {d12}, [r0,:64], r2
|
yading@10
|
483 vst1.8 {d13}, [r0,:64], r2
|
yading@10
|
484 vst1.8 {d14}, [r0,:64], r2
|
yading@10
|
485 vst1.8 {d15}, [r0,:64], r2
|
yading@10
|
486 vst1.8 {d8}, [r0,:64], r2
|
yading@10
|
487 vst1.8 {d9}, [r0,:64], r2
|
yading@10
|
488 vst1.8 {d10}, [r0,:64], r2
|
yading@10
|
489 vst1.8 {d11}, [r0,:64], r2
|
yading@10
|
490
|
yading@10
|
491 mov lr, r10
|
yading@10
|
492 bx lr
|
yading@10
|
493 endfunc
|
yading@10
|
494 .endm
|
yading@10
|
495
|
yading@10
|
496 h264_qpel8_hv_lowpass put
|
yading@10
|
497 h264_qpel8_hv_lowpass avg
|
yading@10
|
498
|
yading@10
|
499 .macro h264_qpel8_hv_lowpass_l2 type
|
yading@10
|
500 function \type\()_h264_qpel8_hv_lowpass_l2_neon
|
yading@10
|
501 mov r10, lr
|
yading@10
|
502 bl put_h264_qpel8_hv_lowpass_neon_top
|
yading@10
|
503
|
yading@10
|
504 vld1.8 {d0, d1}, [r2,:128]!
|
yading@10
|
505 vld1.8 {d2, d3}, [r2,:128]!
|
yading@10
|
506 vrhadd.u8 q0, q0, q6
|
yading@10
|
507 vld1.8 {d4, d5}, [r2,:128]!
|
yading@10
|
508 vrhadd.u8 q1, q1, q7
|
yading@10
|
509 vld1.8 {d6, d7}, [r2,:128]!
|
yading@10
|
510 vrhadd.u8 q2, q2, q4
|
yading@10
|
511 vrhadd.u8 q3, q3, q5
|
yading@10
|
512 .ifc \type,avg
|
yading@10
|
513 vld1.8 {d16}, [r0,:64], r3
|
yading@10
|
514 vrhadd.u8 d0, d0, d16
|
yading@10
|
515 vld1.8 {d17}, [r0,:64], r3
|
yading@10
|
516 vrhadd.u8 d1, d1, d17
|
yading@10
|
517 vld1.8 {d18}, [r0,:64], r3
|
yading@10
|
518 vrhadd.u8 d2, d2, d18
|
yading@10
|
519 vld1.8 {d19}, [r0,:64], r3
|
yading@10
|
520 vrhadd.u8 d3, d3, d19
|
yading@10
|
521 vld1.8 {d20}, [r0,:64], r3
|
yading@10
|
522 vrhadd.u8 d4, d4, d20
|
yading@10
|
523 vld1.8 {d21}, [r0,:64], r3
|
yading@10
|
524 vrhadd.u8 d5, d5, d21
|
yading@10
|
525 vld1.8 {d22}, [r0,:64], r3
|
yading@10
|
526 vrhadd.u8 d6, d6, d22
|
yading@10
|
527 vld1.8 {d23}, [r0,:64], r3
|
yading@10
|
528 vrhadd.u8 d7, d7, d23
|
yading@10
|
529 sub r0, r0, r3, lsl #3
|
yading@10
|
530 .endif
|
yading@10
|
531 vst1.8 {d0}, [r0,:64], r3
|
yading@10
|
532 vst1.8 {d1}, [r0,:64], r3
|
yading@10
|
533 vst1.8 {d2}, [r0,:64], r3
|
yading@10
|
534 vst1.8 {d3}, [r0,:64], r3
|
yading@10
|
535 vst1.8 {d4}, [r0,:64], r3
|
yading@10
|
536 vst1.8 {d5}, [r0,:64], r3
|
yading@10
|
537 vst1.8 {d6}, [r0,:64], r3
|
yading@10
|
538 vst1.8 {d7}, [r0,:64], r3
|
yading@10
|
539
|
yading@10
|
540 mov lr, r10
|
yading@10
|
541 bx lr
|
yading@10
|
542 endfunc
|
yading@10
|
543 .endm
|
yading@10
|
544
|
yading@10
|
545 h264_qpel8_hv_lowpass_l2 put
|
yading@10
|
546 h264_qpel8_hv_lowpass_l2 avg
|
yading@10
|
547
|
yading@10
|
548 .macro h264_qpel16_hv type
|
yading@10
|
549 function \type\()_h264_qpel16_hv_lowpass_neon
|
yading@10
|
550 mov r9, lr
|
yading@10
|
551 bl \type\()_h264_qpel8_hv_lowpass_neon
|
yading@10
|
552 sub r1, r1, r3, lsl #2
|
yading@10
|
553 bl \type\()_h264_qpel8_hv_lowpass_neon
|
yading@10
|
554 sub r1, r1, r3, lsl #4
|
yading@10
|
555 sub r1, r1, r3, lsl #2
|
yading@10
|
556 add r1, r1, #8
|
yading@10
|
557 sub r0, r0, r2, lsl #4
|
yading@10
|
558 add r0, r0, #8
|
yading@10
|
559 bl \type\()_h264_qpel8_hv_lowpass_neon
|
yading@10
|
560 sub r1, r1, r3, lsl #2
|
yading@10
|
561 mov lr, r9
|
yading@10
|
562 b \type\()_h264_qpel8_hv_lowpass_neon
|
yading@10
|
563 endfunc
|
yading@10
|
564
|
yading@10
|
565 function \type\()_h264_qpel16_hv_lowpass_l2_neon
|
yading@10
|
566 mov r9, lr
|
yading@10
|
567 sub r2, r4, #256
|
yading@10
|
568 bl \type\()_h264_qpel8_hv_lowpass_l2_neon
|
yading@10
|
569 sub r1, r1, r3, lsl #2
|
yading@10
|
570 bl \type\()_h264_qpel8_hv_lowpass_l2_neon
|
yading@10
|
571 sub r1, r1, r3, lsl #4
|
yading@10
|
572 sub r1, r1, r3, lsl #2
|
yading@10
|
573 add r1, r1, #8
|
yading@10
|
574 sub r0, r0, r3, lsl #4
|
yading@10
|
575 add r0, r0, #8
|
yading@10
|
576 bl \type\()_h264_qpel8_hv_lowpass_l2_neon
|
yading@10
|
577 sub r1, r1, r3, lsl #2
|
yading@10
|
578 mov lr, r9
|
yading@10
|
579 b \type\()_h264_qpel8_hv_lowpass_l2_neon
|
yading@10
|
580 endfunc
|
yading@10
|
581 .endm
|
yading@10
|
582
|
yading@10
|
583 h264_qpel16_hv put
|
yading@10
|
584 h264_qpel16_hv avg
|
yading@10
|
585
|
yading@10
|
586 .macro h264_qpel8 type
|
yading@10
|
587 function ff_\type\()_h264_qpel8_mc10_neon, export=1
|
yading@10
|
588 lowpass_const r3
|
yading@10
|
589 mov r3, r1
|
yading@10
|
590 sub r1, r1, #2
|
yading@10
|
591 mov r12, #8
|
yading@10
|
592 b \type\()_h264_qpel8_h_lowpass_l2_neon
|
yading@10
|
593 endfunc
|
yading@10
|
594
|
yading@10
|
595 function ff_\type\()_h264_qpel8_mc20_neon, export=1
|
yading@10
|
596 lowpass_const r3
|
yading@10
|
597 sub r1, r1, #2
|
yading@10
|
598 mov r3, r2
|
yading@10
|
599 mov r12, #8
|
yading@10
|
600 b \type\()_h264_qpel8_h_lowpass_neon
|
yading@10
|
601 endfunc
|
yading@10
|
602
|
yading@10
|
603 function ff_\type\()_h264_qpel8_mc30_neon, export=1
|
yading@10
|
604 lowpass_const r3
|
yading@10
|
605 add r3, r1, #1
|
yading@10
|
606 sub r1, r1, #2
|
yading@10
|
607 mov r12, #8
|
yading@10
|
608 b \type\()_h264_qpel8_h_lowpass_l2_neon
|
yading@10
|
609 endfunc
|
yading@10
|
610
|
yading@10
|
611 function ff_\type\()_h264_qpel8_mc01_neon, export=1
|
yading@10
|
612 push {lr}
|
yading@10
|
613 mov r12, r1
|
yading@10
|
614 \type\()_h264_qpel8_mc01:
|
yading@10
|
615 lowpass_const r3
|
yading@10
|
616 mov r3, r2
|
yading@10
|
617 sub r1, r1, r2, lsl #1
|
yading@10
|
618 vpush {d8-d15}
|
yading@10
|
619 bl \type\()_h264_qpel8_v_lowpass_l2_neon
|
yading@10
|
620 vpop {d8-d15}
|
yading@10
|
621 pop {pc}
|
yading@10
|
622 endfunc
|
yading@10
|
623
|
yading@10
|
624 function ff_\type\()_h264_qpel8_mc11_neon, export=1
|
yading@10
|
625 push {r0, r1, r11, lr}
|
yading@10
|
626 \type\()_h264_qpel8_mc11:
|
yading@10
|
627 lowpass_const r3
|
yading@10
|
628 mov r11, sp
|
yading@10
|
629 A bic sp, sp, #15
|
yading@10
|
630 T bic r0, r11, #15
|
yading@10
|
631 T mov sp, r0
|
yading@10
|
632 sub sp, sp, #64
|
yading@10
|
633 mov r0, sp
|
yading@10
|
634 sub r1, r1, #2
|
yading@10
|
635 mov r3, #8
|
yading@10
|
636 mov r12, #8
|
yading@10
|
637 vpush {d8-d15}
|
yading@10
|
638 bl put_h264_qpel8_h_lowpass_neon
|
yading@10
|
639 ldrd r0, r1, [r11], #8
|
yading@10
|
640 mov r3, r2
|
yading@10
|
641 add r12, sp, #64
|
yading@10
|
642 sub r1, r1, r2, lsl #1
|
yading@10
|
643 mov r2, #8
|
yading@10
|
644 bl \type\()_h264_qpel8_v_lowpass_l2_neon
|
yading@10
|
645 vpop {d8-d15}
|
yading@10
|
646 mov sp, r11
|
yading@10
|
647 pop {r11, pc}
|
yading@10
|
648 endfunc
|
yading@10
|
649
|
yading@10
|
650 function ff_\type\()_h264_qpel8_mc21_neon, export=1
|
yading@10
|
651 push {r0, r1, r4, r10, r11, lr}
|
yading@10
|
652 \type\()_h264_qpel8_mc21:
|
yading@10
|
653 lowpass_const r3
|
yading@10
|
654 mov r11, sp
|
yading@10
|
655 A bic sp, sp, #15
|
yading@10
|
656 T bic r0, r11, #15
|
yading@10
|
657 T mov sp, r0
|
yading@10
|
658 sub sp, sp, #(8*8+16*12)
|
yading@10
|
659 sub r1, r1, #2
|
yading@10
|
660 mov r3, #8
|
yading@10
|
661 mov r0, sp
|
yading@10
|
662 mov r12, #8
|
yading@10
|
663 vpush {d8-d15}
|
yading@10
|
664 bl put_h264_qpel8_h_lowpass_neon
|
yading@10
|
665 mov r4, r0
|
yading@10
|
666 ldrd r0, r1, [r11], #8
|
yading@10
|
667 sub r1, r1, r2, lsl #1
|
yading@10
|
668 sub r1, r1, #2
|
yading@10
|
669 mov r3, r2
|
yading@10
|
670 sub r2, r4, #64
|
yading@10
|
671 bl \type\()_h264_qpel8_hv_lowpass_l2_neon
|
yading@10
|
672 vpop {d8-d15}
|
yading@10
|
673 mov sp, r11
|
yading@10
|
674 pop {r4, r10, r11, pc}
|
yading@10
|
675 endfunc
|
yading@10
|
676
|
yading@10
|
677 function ff_\type\()_h264_qpel8_mc31_neon, export=1
|
yading@10
|
678 add r1, r1, #1
|
yading@10
|
679 push {r0, r1, r11, lr}
|
yading@10
|
680 sub r1, r1, #1
|
yading@10
|
681 b \type\()_h264_qpel8_mc11
|
yading@10
|
682 endfunc
|
yading@10
|
683
|
yading@10
|
684 function ff_\type\()_h264_qpel8_mc02_neon, export=1
|
yading@10
|
685 push {lr}
|
yading@10
|
686 lowpass_const r3
|
yading@10
|
687 sub r1, r1, r2, lsl #1
|
yading@10
|
688 mov r3, r2
|
yading@10
|
689 vpush {d8-d15}
|
yading@10
|
690 bl \type\()_h264_qpel8_v_lowpass_neon
|
yading@10
|
691 vpop {d8-d15}
|
yading@10
|
692 pop {pc}
|
yading@10
|
693 endfunc
|
yading@10
|
694
|
yading@10
|
695 function ff_\type\()_h264_qpel8_mc12_neon, export=1
|
yading@10
|
696 push {r0, r1, r4, r10, r11, lr}
|
yading@10
|
697 \type\()_h264_qpel8_mc12:
|
yading@10
|
698 lowpass_const r3
|
yading@10
|
699 mov r11, sp
|
yading@10
|
700 A bic sp, sp, #15
|
yading@10
|
701 T bic r0, r11, #15
|
yading@10
|
702 T mov sp, r0
|
yading@10
|
703 sub sp, sp, #(8*8+16*12)
|
yading@10
|
704 sub r1, r1, r2, lsl #1
|
yading@10
|
705 mov r3, r2
|
yading@10
|
706 mov r2, #8
|
yading@10
|
707 mov r0, sp
|
yading@10
|
708 vpush {d8-d15}
|
yading@10
|
709 bl put_h264_qpel8_v_lowpass_neon
|
yading@10
|
710 mov r4, r0
|
yading@10
|
711 ldrd r0, r1, [r11], #8
|
yading@10
|
712 sub r1, r1, r3, lsl #1
|
yading@10
|
713 sub r1, r1, #2
|
yading@10
|
714 sub r2, r4, #64
|
yading@10
|
715 bl \type\()_h264_qpel8_hv_lowpass_l2_neon
|
yading@10
|
716 vpop {d8-d15}
|
yading@10
|
717 mov sp, r11
|
yading@10
|
718 pop {r4, r10, r11, pc}
|
yading@10
|
719 endfunc
|
yading@10
|
720
|
yading@10
|
721 function ff_\type\()_h264_qpel8_mc22_neon, export=1
|
yading@10
|
722 push {r4, r10, r11, lr}
|
yading@10
|
723 mov r11, sp
|
yading@10
|
724 A bic sp, sp, #15
|
yading@10
|
725 T bic r4, r11, #15
|
yading@10
|
726 T mov sp, r4
|
yading@10
|
727 sub r1, r1, r2, lsl #1
|
yading@10
|
728 sub r1, r1, #2
|
yading@10
|
729 mov r3, r2
|
yading@10
|
730 sub sp, sp, #(16*12)
|
yading@10
|
731 mov r4, sp
|
yading@10
|
732 vpush {d8-d15}
|
yading@10
|
733 bl \type\()_h264_qpel8_hv_lowpass_neon
|
yading@10
|
734 vpop {d8-d15}
|
yading@10
|
735 mov sp, r11
|
yading@10
|
736 pop {r4, r10, r11, pc}
|
yading@10
|
737 endfunc
|
yading@10
|
738
|
yading@10
|
739 function ff_\type\()_h264_qpel8_mc32_neon, export=1
|
yading@10
|
740 push {r0, r1, r4, r10, r11, lr}
|
yading@10
|
741 add r1, r1, #1
|
yading@10
|
742 b \type\()_h264_qpel8_mc12
|
yading@10
|
743 endfunc
|
yading@10
|
744
|
yading@10
|
745 function ff_\type\()_h264_qpel8_mc03_neon, export=1
|
yading@10
|
746 push {lr}
|
yading@10
|
747 add r12, r1, r2
|
yading@10
|
748 b \type\()_h264_qpel8_mc01
|
yading@10
|
749 endfunc
|
yading@10
|
750
|
yading@10
|
751 function ff_\type\()_h264_qpel8_mc13_neon, export=1
|
yading@10
|
752 push {r0, r1, r11, lr}
|
yading@10
|
753 add r1, r1, r2
|
yading@10
|
754 b \type\()_h264_qpel8_mc11
|
yading@10
|
755 endfunc
|
yading@10
|
756
|
yading@10
|
757 function ff_\type\()_h264_qpel8_mc23_neon, export=1
|
yading@10
|
758 push {r0, r1, r4, r10, r11, lr}
|
yading@10
|
759 add r1, r1, r2
|
yading@10
|
760 b \type\()_h264_qpel8_mc21
|
yading@10
|
761 endfunc
|
yading@10
|
762
|
yading@10
|
763 function ff_\type\()_h264_qpel8_mc33_neon, export=1
|
yading@10
|
764 add r1, r1, #1
|
yading@10
|
765 push {r0, r1, r11, lr}
|
yading@10
|
766 add r1, r1, r2
|
yading@10
|
767 sub r1, r1, #1
|
yading@10
|
768 b \type\()_h264_qpel8_mc11
|
yading@10
|
769 endfunc
|
yading@10
|
770 .endm
|
yading@10
|
771
|
yading@10
|
772 h264_qpel8 put
|
yading@10
|
773 h264_qpel8 avg
|
yading@10
|
774
|
yading@10
|
775 .macro h264_qpel16 type
|
yading@10
|
776 function ff_\type\()_h264_qpel16_mc10_neon, export=1
|
yading@10
|
777 lowpass_const r3
|
yading@10
|
778 mov r3, r1
|
yading@10
|
779 sub r1, r1, #2
|
yading@10
|
780 b \type\()_h264_qpel16_h_lowpass_l2_neon
|
yading@10
|
781 endfunc
|
yading@10
|
782
|
yading@10
|
783 function ff_\type\()_h264_qpel16_mc20_neon, export=1
|
yading@10
|
784 lowpass_const r3
|
yading@10
|
785 sub r1, r1, #2
|
yading@10
|
786 mov r3, r2
|
yading@10
|
787 b \type\()_h264_qpel16_h_lowpass_neon
|
yading@10
|
788 endfunc
|
yading@10
|
789
|
yading@10
|
790 function ff_\type\()_h264_qpel16_mc30_neon, export=1
|
yading@10
|
791 lowpass_const r3
|
yading@10
|
792 add r3, r1, #1
|
yading@10
|
793 sub r1, r1, #2
|
yading@10
|
794 b \type\()_h264_qpel16_h_lowpass_l2_neon
|
yading@10
|
795 endfunc
|
yading@10
|
796
|
yading@10
|
797 function ff_\type\()_h264_qpel16_mc01_neon, export=1
|
yading@10
|
798 push {r4, lr}
|
yading@10
|
799 mov r12, r1
|
yading@10
|
800 \type\()_h264_qpel16_mc01:
|
yading@10
|
801 lowpass_const r3
|
yading@10
|
802 mov r3, r2
|
yading@10
|
803 sub r1, r1, r2, lsl #1
|
yading@10
|
804 vpush {d8-d15}
|
yading@10
|
805 bl \type\()_h264_qpel16_v_lowpass_l2_neon
|
yading@10
|
806 vpop {d8-d15}
|
yading@10
|
807 pop {r4, pc}
|
yading@10
|
808 endfunc
|
yading@10
|
809
|
yading@10
|
810 function ff_\type\()_h264_qpel16_mc11_neon, export=1
|
yading@10
|
811 push {r0, r1, r4, r11, lr}
|
yading@10
|
812 \type\()_h264_qpel16_mc11:
|
yading@10
|
813 lowpass_const r3
|
yading@10
|
814 mov r11, sp
|
yading@10
|
815 A bic sp, sp, #15
|
yading@10
|
816 T bic r0, r11, #15
|
yading@10
|
817 T mov sp, r0
|
yading@10
|
818 sub sp, sp, #256
|
yading@10
|
819 mov r0, sp
|
yading@10
|
820 sub r1, r1, #2
|
yading@10
|
821 mov r3, #16
|
yading@10
|
822 vpush {d8-d15}
|
yading@10
|
823 bl put_h264_qpel16_h_lowpass_neon
|
yading@10
|
824 ldrd r0, r1, [r11], #8
|
yading@10
|
825 mov r3, r2
|
yading@10
|
826 add r12, sp, #64
|
yading@10
|
827 sub r1, r1, r2, lsl #1
|
yading@10
|
828 mov r2, #16
|
yading@10
|
829 bl \type\()_h264_qpel16_v_lowpass_l2_neon
|
yading@10
|
830 vpop {d8-d15}
|
yading@10
|
831 mov sp, r11
|
yading@10
|
832 pop {r4, r11, pc}
|
yading@10
|
833 endfunc
|
yading@10
|
834
|
yading@10
|
835 function ff_\type\()_h264_qpel16_mc21_neon, export=1
|
yading@10
|
836 push {r0, r1, r4-r5, r9-r11, lr}
|
yading@10
|
837 \type\()_h264_qpel16_mc21:
|
yading@10
|
838 lowpass_const r3
|
yading@10
|
839 mov r11, sp
|
yading@10
|
840 A bic sp, sp, #15
|
yading@10
|
841 T bic r0, r11, #15
|
yading@10
|
842 T mov sp, r0
|
yading@10
|
843 sub sp, sp, #(16*16+16*12)
|
yading@10
|
844 sub r1, r1, #2
|
yading@10
|
845 mov r0, sp
|
yading@10
|
846 vpush {d8-d15}
|
yading@10
|
847 bl put_h264_qpel16_h_lowpass_neon_packed
|
yading@10
|
848 mov r4, r0
|
yading@10
|
849 ldrd r0, r1, [r11], #8
|
yading@10
|
850 sub r1, r1, r2, lsl #1
|
yading@10
|
851 sub r1, r1, #2
|
yading@10
|
852 mov r3, r2
|
yading@10
|
853 bl \type\()_h264_qpel16_hv_lowpass_l2_neon
|
yading@10
|
854 vpop {d8-d15}
|
yading@10
|
855 mov sp, r11
|
yading@10
|
856 pop {r4-r5, r9-r11, pc}
|
yading@10
|
857 endfunc
|
yading@10
|
858
|
yading@10
|
859 function ff_\type\()_h264_qpel16_mc31_neon, export=1
|
yading@10
|
860 add r1, r1, #1
|
yading@10
|
861 push {r0, r1, r4, r11, lr}
|
yading@10
|
862 sub r1, r1, #1
|
yading@10
|
863 b \type\()_h264_qpel16_mc11
|
yading@10
|
864 endfunc
|
yading@10
|
865
|
yading@10
|
866 function ff_\type\()_h264_qpel16_mc02_neon, export=1
|
yading@10
|
867 push {r4, lr}
|
yading@10
|
868 lowpass_const r3
|
yading@10
|
869 sub r1, r1, r2, lsl #1
|
yading@10
|
870 mov r3, r2
|
yading@10
|
871 vpush {d8-d15}
|
yading@10
|
872 bl \type\()_h264_qpel16_v_lowpass_neon
|
yading@10
|
873 vpop {d8-d15}
|
yading@10
|
874 pop {r4, pc}
|
yading@10
|
875 endfunc
|
yading@10
|
876
|
yading@10
|
877 function ff_\type\()_h264_qpel16_mc12_neon, export=1
|
yading@10
|
878 push {r0, r1, r4-r5, r9-r11, lr}
|
yading@10
|
879 \type\()_h264_qpel16_mc12:
|
yading@10
|
880 lowpass_const r3
|
yading@10
|
881 mov r11, sp
|
yading@10
|
882 A bic sp, sp, #15
|
yading@10
|
883 T bic r0, r11, #15
|
yading@10
|
884 T mov sp, r0
|
yading@10
|
885 sub sp, sp, #(16*16+16*12)
|
yading@10
|
886 sub r1, r1, r2, lsl #1
|
yading@10
|
887 mov r0, sp
|
yading@10
|
888 mov r3, r2
|
yading@10
|
889 vpush {d8-d15}
|
yading@10
|
890 bl put_h264_qpel16_v_lowpass_neon_packed
|
yading@10
|
891 mov r4, r0
|
yading@10
|
892 ldrd r0, r1, [r11], #8
|
yading@10
|
893 sub r1, r1, r3, lsl #1
|
yading@10
|
894 sub r1, r1, #2
|
yading@10
|
895 mov r2, r3
|
yading@10
|
896 bl \type\()_h264_qpel16_hv_lowpass_l2_neon
|
yading@10
|
897 vpop {d8-d15}
|
yading@10
|
898 mov sp, r11
|
yading@10
|
899 pop {r4-r5, r9-r11, pc}
|
yading@10
|
900 endfunc
|
yading@10
|
901
|
yading@10
|
902 function ff_\type\()_h264_qpel16_mc22_neon, export=1
|
yading@10
|
903 push {r4, r9-r11, lr}
|
yading@10
|
904 lowpass_const r3
|
yading@10
|
905 mov r11, sp
|
yading@10
|
906 A bic sp, sp, #15
|
yading@10
|
907 T bic r4, r11, #15
|
yading@10
|
908 T mov sp, r4
|
yading@10
|
909 sub r1, r1, r2, lsl #1
|
yading@10
|
910 sub r1, r1, #2
|
yading@10
|
911 mov r3, r2
|
yading@10
|
912 sub sp, sp, #(16*12)
|
yading@10
|
913 mov r4, sp
|
yading@10
|
914 vpush {d8-d15}
|
yading@10
|
915 bl \type\()_h264_qpel16_hv_lowpass_neon
|
yading@10
|
916 vpop {d8-d15}
|
yading@10
|
917 mov sp, r11
|
yading@10
|
918 pop {r4, r9-r11, pc}
|
yading@10
|
919 endfunc
|
yading@10
|
920
|
yading@10
|
921 function ff_\type\()_h264_qpel16_mc32_neon, export=1
|
yading@10
|
922 push {r0, r1, r4-r5, r9-r11, lr}
|
yading@10
|
923 add r1, r1, #1
|
yading@10
|
924 b \type\()_h264_qpel16_mc12
|
yading@10
|
925 endfunc
|
yading@10
|
926
|
yading@10
|
927 function ff_\type\()_h264_qpel16_mc03_neon, export=1
|
yading@10
|
928 push {r4, lr}
|
yading@10
|
929 add r12, r1, r2
|
yading@10
|
930 b \type\()_h264_qpel16_mc01
|
yading@10
|
931 endfunc
|
yading@10
|
932
|
yading@10
|
933 function ff_\type\()_h264_qpel16_mc13_neon, export=1
|
yading@10
|
934 push {r0, r1, r4, r11, lr}
|
yading@10
|
935 add r1, r1, r2
|
yading@10
|
936 b \type\()_h264_qpel16_mc11
|
yading@10
|
937 endfunc
|
yading@10
|
938
|
yading@10
|
939 function ff_\type\()_h264_qpel16_mc23_neon, export=1
|
yading@10
|
940 push {r0, r1, r4-r5, r9-r11, lr}
|
yading@10
|
941 add r1, r1, r2
|
yading@10
|
942 b \type\()_h264_qpel16_mc21
|
yading@10
|
943 endfunc
|
yading@10
|
944
|
yading@10
|
945 function ff_\type\()_h264_qpel16_mc33_neon, export=1
|
yading@10
|
946 add r1, r1, #1
|
yading@10
|
947 push {r0, r1, r4, r11, lr}
|
yading@10
|
948 add r1, r1, r2
|
yading@10
|
949 sub r1, r1, #1
|
yading@10
|
950 b \type\()_h264_qpel16_mc11
|
yading@10
|
951 endfunc
|
yading@10
|
952 .endm
|
yading@10
|
953
|
yading@10
|
954 h264_qpel16 put
|
yading@10
|
955 h264_qpel16 avg
|