yading@10
|
1 /*
|
yading@10
|
2 * Simple IDCT
|
yading@10
|
3 *
|
yading@10
|
4 * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
|
yading@10
|
5 * Copyright (c) 2006 Mans Rullgard <mans@mansr.com>
|
yading@10
|
6 *
|
yading@10
|
7 * This file is part of FFmpeg.
|
yading@10
|
8 *
|
yading@10
|
9 * FFmpeg is free software; you can redistribute it and/or
|
yading@10
|
10 * modify it under the terms of the GNU Lesser General Public
|
yading@10
|
11 * License as published by the Free Software Foundation; either
|
yading@10
|
12 * version 2.1 of the License, or (at your option) any later version.
|
yading@10
|
13 *
|
yading@10
|
14 * FFmpeg is distributed in the hope that it will be useful,
|
yading@10
|
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
yading@10
|
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
yading@10
|
17 * Lesser General Public License for more details.
|
yading@10
|
18 *
|
yading@10
|
19 * You should have received a copy of the GNU Lesser General Public
|
yading@10
|
20 * License along with FFmpeg; if not, write to the Free Software
|
yading@10
|
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
yading@10
|
22 */
|
yading@10
|
23
|
yading@10
|
24 #include "libavutil/arm/asm.S"
|
yading@10
|
25
|
yading@10
|
26 #define W1 22725 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
|
yading@10
|
27 #define W2 21407 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
|
yading@10
|
28 #define W3 19266 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
|
yading@10
|
29 #define W4 16383 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
|
yading@10
|
30 #define W5 12873 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
|
yading@10
|
31 #define W6 8867 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
|
yading@10
|
32 #define W7 4520 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
|
yading@10
|
33 #define ROW_SHIFT 11
|
yading@10
|
34 #define COL_SHIFT 20
|
yading@10
|
35
|
yading@10
|
36 #define W13 (W1 | (W3 << 16))
|
yading@10
|
37 #define W26 (W2 | (W6 << 16))
|
yading@10
|
38 #define W57 (W5 | (W7 << 16))
|
yading@10
|
39
|
yading@10
|
40 function idct_row_armv5te
|
yading@10
|
41 str lr, [sp, #-4]!
|
yading@10
|
42
|
yading@10
|
43 ldrd v1, v2, [a1, #8]
|
yading@10
|
44 ldrd a3, a4, [a1] /* a3 = row[1:0], a4 = row[3:2] */
|
yading@10
|
45 orrs v1, v1, v2
|
yading@10
|
46 itt eq
|
yading@10
|
47 cmpeq v1, a4
|
yading@10
|
48 cmpeq v1, a3, lsr #16
|
yading@10
|
49 beq row_dc_only
|
yading@10
|
50
|
yading@10
|
51 mov v1, #(1<<(ROW_SHIFT-1))
|
yading@10
|
52 mov ip, #16384
|
yading@10
|
53 sub ip, ip, #1 /* ip = W4 */
|
yading@10
|
54 smlabb v1, ip, a3, v1 /* v1 = W4*row[0]+(1<<(RS-1)) */
|
yading@10
|
55 ldr ip, =W26 /* ip = W2 | (W6 << 16) */
|
yading@10
|
56 smultb a2, ip, a4
|
yading@10
|
57 smulbb lr, ip, a4
|
yading@10
|
58 add v2, v1, a2
|
yading@10
|
59 sub v3, v1, a2
|
yading@10
|
60 sub v4, v1, lr
|
yading@10
|
61 add v1, v1, lr
|
yading@10
|
62
|
yading@10
|
63 ldr ip, =W13 /* ip = W1 | (W3 << 16) */
|
yading@10
|
64 ldr lr, =W57 /* lr = W5 | (W7 << 16) */
|
yading@10
|
65 smulbt v5, ip, a3
|
yading@10
|
66 smultt v6, lr, a4
|
yading@10
|
67 smlatt v5, ip, a4, v5
|
yading@10
|
68 smultt a2, ip, a3
|
yading@10
|
69 smulbt v7, lr, a3
|
yading@10
|
70 sub v6, v6, a2
|
yading@10
|
71 smulbt a2, ip, a4
|
yading@10
|
72 smultt fp, lr, a3
|
yading@10
|
73 sub v7, v7, a2
|
yading@10
|
74 smulbt a2, lr, a4
|
yading@10
|
75 ldrd a3, a4, [a1, #8] /* a3=row[5:4] a4=row[7:6] */
|
yading@10
|
76 sub fp, fp, a2
|
yading@10
|
77
|
yading@10
|
78 orrs a2, a3, a4
|
yading@10
|
79 beq 1f
|
yading@10
|
80
|
yading@10
|
81 smlabt v5, lr, a3, v5
|
yading@10
|
82 smlabt v6, ip, a3, v6
|
yading@10
|
83 smlatt v5, lr, a4, v5
|
yading@10
|
84 smlabt v6, lr, a4, v6
|
yading@10
|
85 smlatt v7, lr, a3, v7
|
yading@10
|
86 smlatt fp, ip, a3, fp
|
yading@10
|
87 smulbt a2, ip, a4
|
yading@10
|
88 smlatt v7, ip, a4, v7
|
yading@10
|
89 sub fp, fp, a2
|
yading@10
|
90
|
yading@10
|
91 ldr ip, =W26 /* ip = W2 | (W6 << 16) */
|
yading@10
|
92 mov a2, #16384
|
yading@10
|
93 sub a2, a2, #1 /* a2 = W4 */
|
yading@10
|
94 smulbb a2, a2, a3 /* a2 = W4*row[4] */
|
yading@10
|
95 smultb lr, ip, a4 /* lr = W6*row[6] */
|
yading@10
|
96 add v1, v1, a2 /* v1 += W4*row[4] */
|
yading@10
|
97 add v1, v1, lr /* v1 += W6*row[6] */
|
yading@10
|
98 add v4, v4, a2 /* v4 += W4*row[4] */
|
yading@10
|
99 sub v4, v4, lr /* v4 -= W6*row[6] */
|
yading@10
|
100 smulbb lr, ip, a4 /* lr = W2*row[6] */
|
yading@10
|
101 sub v2, v2, a2 /* v2 -= W4*row[4] */
|
yading@10
|
102 sub v2, v2, lr /* v2 -= W2*row[6] */
|
yading@10
|
103 sub v3, v3, a2 /* v3 -= W4*row[4] */
|
yading@10
|
104 add v3, v3, lr /* v3 += W2*row[6] */
|
yading@10
|
105
|
yading@10
|
106 1: add a2, v1, v5
|
yading@10
|
107 mov a3, a2, lsr #11
|
yading@10
|
108 bic a3, a3, #0x1f0000
|
yading@10
|
109 sub a2, v2, v6
|
yading@10
|
110 mov a2, a2, lsr #11
|
yading@10
|
111 add a3, a3, a2, lsl #16
|
yading@10
|
112 add a2, v3, v7
|
yading@10
|
113 mov a4, a2, lsr #11
|
yading@10
|
114 bic a4, a4, #0x1f0000
|
yading@10
|
115 add a2, v4, fp
|
yading@10
|
116 mov a2, a2, lsr #11
|
yading@10
|
117 add a4, a4, a2, lsl #16
|
yading@10
|
118 strd a3, a4, [a1]
|
yading@10
|
119
|
yading@10
|
120 sub a2, v4, fp
|
yading@10
|
121 mov a3, a2, lsr #11
|
yading@10
|
122 bic a3, a3, #0x1f0000
|
yading@10
|
123 sub a2, v3, v7
|
yading@10
|
124 mov a2, a2, lsr #11
|
yading@10
|
125 add a3, a3, a2, lsl #16
|
yading@10
|
126 add a2, v2, v6
|
yading@10
|
127 mov a4, a2, lsr #11
|
yading@10
|
128 bic a4, a4, #0x1f0000
|
yading@10
|
129 sub a2, v1, v5
|
yading@10
|
130 mov a2, a2, lsr #11
|
yading@10
|
131 add a4, a4, a2, lsl #16
|
yading@10
|
132 strd a3, a4, [a1, #8]
|
yading@10
|
133
|
yading@10
|
134 ldr pc, [sp], #4
|
yading@10
|
135
|
yading@10
|
136 row_dc_only:
|
yading@10
|
137 orr a3, a3, a3, lsl #16
|
yading@10
|
138 bic a3, a3, #0xe000
|
yading@10
|
139 mov a3, a3, lsl #3
|
yading@10
|
140 mov a4, a3
|
yading@10
|
141 strd a3, a4, [a1]
|
yading@10
|
142 strd a3, a4, [a1, #8]
|
yading@10
|
143
|
yading@10
|
144 ldr pc, [sp], #4
|
yading@10
|
145 endfunc
|
yading@10
|
146
|
yading@10
|
147 .macro idct_col
|
yading@10
|
148 ldr a4, [a1] /* a4 = col[1:0] */
|
yading@10
|
149 mov ip, #16384
|
yading@10
|
150 sub ip, ip, #1 /* ip = W4 */
|
yading@10
|
151 #if 0
|
yading@10
|
152 mov v1, #(1<<(COL_SHIFT-1))
|
yading@10
|
153 smlabt v2, ip, a4, v1 /* v2 = W4*col[1] + (1<<(COL_SHIFT-1)) */
|
yading@10
|
154 smlabb v1, ip, a4, v1 /* v1 = W4*col[0] + (1<<(COL_SHIFT-1)) */
|
yading@10
|
155 ldr a4, [a1, #(16*4)]
|
yading@10
|
156 #else
|
yading@10
|
157 mov v1, #((1<<(COL_SHIFT-1))/W4) /* this matches the C version */
|
yading@10
|
158 add v2, v1, a4, asr #16
|
yading@10
|
159 rsb v2, v2, v2, lsl #14
|
yading@10
|
160 mov a4, a4, lsl #16
|
yading@10
|
161 add v1, v1, a4, asr #16
|
yading@10
|
162 ldr a4, [a1, #(16*4)]
|
yading@10
|
163 rsb v1, v1, v1, lsl #14
|
yading@10
|
164 #endif
|
yading@10
|
165
|
yading@10
|
166 smulbb lr, ip, a4
|
yading@10
|
167 smulbt a3, ip, a4
|
yading@10
|
168 sub v3, v1, lr
|
yading@10
|
169 sub v5, v1, lr
|
yading@10
|
170 add v7, v1, lr
|
yading@10
|
171 add v1, v1, lr
|
yading@10
|
172 sub v4, v2, a3
|
yading@10
|
173 sub v6, v2, a3
|
yading@10
|
174 add fp, v2, a3
|
yading@10
|
175 ldr ip, =W26
|
yading@10
|
176 ldr a4, [a1, #(16*2)]
|
yading@10
|
177 add v2, v2, a3
|
yading@10
|
178
|
yading@10
|
179 smulbb lr, ip, a4
|
yading@10
|
180 smultb a3, ip, a4
|
yading@10
|
181 add v1, v1, lr
|
yading@10
|
182 sub v7, v7, lr
|
yading@10
|
183 add v3, v3, a3
|
yading@10
|
184 sub v5, v5, a3
|
yading@10
|
185 smulbt lr, ip, a4
|
yading@10
|
186 smultt a3, ip, a4
|
yading@10
|
187 add v2, v2, lr
|
yading@10
|
188 sub fp, fp, lr
|
yading@10
|
189 add v4, v4, a3
|
yading@10
|
190 ldr a4, [a1, #(16*6)]
|
yading@10
|
191 sub v6, v6, a3
|
yading@10
|
192
|
yading@10
|
193 smultb lr, ip, a4
|
yading@10
|
194 smulbb a3, ip, a4
|
yading@10
|
195 add v1, v1, lr
|
yading@10
|
196 sub v7, v7, lr
|
yading@10
|
197 sub v3, v3, a3
|
yading@10
|
198 add v5, v5, a3
|
yading@10
|
199 smultt lr, ip, a4
|
yading@10
|
200 smulbt a3, ip, a4
|
yading@10
|
201 add v2, v2, lr
|
yading@10
|
202 sub fp, fp, lr
|
yading@10
|
203 sub v4, v4, a3
|
yading@10
|
204 add v6, v6, a3
|
yading@10
|
205
|
yading@10
|
206 stmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp}
|
yading@10
|
207
|
yading@10
|
208 ldr ip, =W13
|
yading@10
|
209 ldr a4, [a1, #(16*1)]
|
yading@10
|
210 ldr lr, =W57
|
yading@10
|
211 smulbb v1, ip, a4
|
yading@10
|
212 smultb v3, ip, a4
|
yading@10
|
213 smulbb v5, lr, a4
|
yading@10
|
214 smultb v7, lr, a4
|
yading@10
|
215 smulbt v2, ip, a4
|
yading@10
|
216 smultt v4, ip, a4
|
yading@10
|
217 smulbt v6, lr, a4
|
yading@10
|
218 smultt fp, lr, a4
|
yading@10
|
219 rsb v4, v4, #0
|
yading@10
|
220 ldr a4, [a1, #(16*3)]
|
yading@10
|
221 rsb v3, v3, #0
|
yading@10
|
222
|
yading@10
|
223 smlatb v1, ip, a4, v1
|
yading@10
|
224 smlatb v3, lr, a4, v3
|
yading@10
|
225 smulbb a3, ip, a4
|
yading@10
|
226 smulbb a2, lr, a4
|
yading@10
|
227 sub v5, v5, a3
|
yading@10
|
228 sub v7, v7, a2
|
yading@10
|
229 smlatt v2, ip, a4, v2
|
yading@10
|
230 smlatt v4, lr, a4, v4
|
yading@10
|
231 smulbt a3, ip, a4
|
yading@10
|
232 smulbt a2, lr, a4
|
yading@10
|
233 sub v6, v6, a3
|
yading@10
|
234 ldr a4, [a1, #(16*5)]
|
yading@10
|
235 sub fp, fp, a2
|
yading@10
|
236
|
yading@10
|
237 smlabb v1, lr, a4, v1
|
yading@10
|
238 smlabb v3, ip, a4, v3
|
yading@10
|
239 smlatb v5, lr, a4, v5
|
yading@10
|
240 smlatb v7, ip, a4, v7
|
yading@10
|
241 smlabt v2, lr, a4, v2
|
yading@10
|
242 smlabt v4, ip, a4, v4
|
yading@10
|
243 smlatt v6, lr, a4, v6
|
yading@10
|
244 ldr a3, [a1, #(16*7)]
|
yading@10
|
245 smlatt fp, ip, a4, fp
|
yading@10
|
246
|
yading@10
|
247 smlatb v1, lr, a3, v1
|
yading@10
|
248 smlabb v3, lr, a3, v3
|
yading@10
|
249 smlatb v5, ip, a3, v5
|
yading@10
|
250 smulbb a4, ip, a3
|
yading@10
|
251 smlatt v2, lr, a3, v2
|
yading@10
|
252 sub v7, v7, a4
|
yading@10
|
253 smlabt v4, lr, a3, v4
|
yading@10
|
254 smulbt a4, ip, a3
|
yading@10
|
255 smlatt v6, ip, a3, v6
|
yading@10
|
256 sub fp, fp, a4
|
yading@10
|
257 .endm
|
yading@10
|
258
|
yading@10
|
259 function idct_col_armv5te
|
yading@10
|
260 str lr, [sp, #-4]!
|
yading@10
|
261
|
yading@10
|
262 idct_col
|
yading@10
|
263
|
yading@10
|
264 ldmfd sp!, {a3, a4}
|
yading@10
|
265 adds a2, a3, v1
|
yading@10
|
266 mov a2, a2, lsr #20
|
yading@10
|
267 it mi
|
yading@10
|
268 orrmi a2, a2, #0xf000
|
yading@10
|
269 add ip, a4, v2
|
yading@10
|
270 mov ip, ip, asr #20
|
yading@10
|
271 orr a2, a2, ip, lsl #16
|
yading@10
|
272 str a2, [a1]
|
yading@10
|
273 subs a3, a3, v1
|
yading@10
|
274 mov a2, a3, lsr #20
|
yading@10
|
275 it mi
|
yading@10
|
276 orrmi a2, a2, #0xf000
|
yading@10
|
277 sub a4, a4, v2
|
yading@10
|
278 mov a4, a4, asr #20
|
yading@10
|
279 orr a2, a2, a4, lsl #16
|
yading@10
|
280 ldmfd sp!, {a3, a4}
|
yading@10
|
281 str a2, [a1, #(16*7)]
|
yading@10
|
282
|
yading@10
|
283 subs a2, a3, v3
|
yading@10
|
284 mov a2, a2, lsr #20
|
yading@10
|
285 it mi
|
yading@10
|
286 orrmi a2, a2, #0xf000
|
yading@10
|
287 sub ip, a4, v4
|
yading@10
|
288 mov ip, ip, asr #20
|
yading@10
|
289 orr a2, a2, ip, lsl #16
|
yading@10
|
290 str a2, [a1, #(16*1)]
|
yading@10
|
291 adds a3, a3, v3
|
yading@10
|
292 mov a2, a3, lsr #20
|
yading@10
|
293 it mi
|
yading@10
|
294 orrmi a2, a2, #0xf000
|
yading@10
|
295 add a4, a4, v4
|
yading@10
|
296 mov a4, a4, asr #20
|
yading@10
|
297 orr a2, a2, a4, lsl #16
|
yading@10
|
298 ldmfd sp!, {a3, a4}
|
yading@10
|
299 str a2, [a1, #(16*6)]
|
yading@10
|
300
|
yading@10
|
301 adds a2, a3, v5
|
yading@10
|
302 mov a2, a2, lsr #20
|
yading@10
|
303 it mi
|
yading@10
|
304 orrmi a2, a2, #0xf000
|
yading@10
|
305 add ip, a4, v6
|
yading@10
|
306 mov ip, ip, asr #20
|
yading@10
|
307 orr a2, a2, ip, lsl #16
|
yading@10
|
308 str a2, [a1, #(16*2)]
|
yading@10
|
309 subs a3, a3, v5
|
yading@10
|
310 mov a2, a3, lsr #20
|
yading@10
|
311 it mi
|
yading@10
|
312 orrmi a2, a2, #0xf000
|
yading@10
|
313 sub a4, a4, v6
|
yading@10
|
314 mov a4, a4, asr #20
|
yading@10
|
315 orr a2, a2, a4, lsl #16
|
yading@10
|
316 ldmfd sp!, {a3, a4}
|
yading@10
|
317 str a2, [a1, #(16*5)]
|
yading@10
|
318
|
yading@10
|
319 adds a2, a3, v7
|
yading@10
|
320 mov a2, a2, lsr #20
|
yading@10
|
321 it mi
|
yading@10
|
322 orrmi a2, a2, #0xf000
|
yading@10
|
323 add ip, a4, fp
|
yading@10
|
324 mov ip, ip, asr #20
|
yading@10
|
325 orr a2, a2, ip, lsl #16
|
yading@10
|
326 str a2, [a1, #(16*3)]
|
yading@10
|
327 subs a3, a3, v7
|
yading@10
|
328 mov a2, a3, lsr #20
|
yading@10
|
329 it mi
|
yading@10
|
330 orrmi a2, a2, #0xf000
|
yading@10
|
331 sub a4, a4, fp
|
yading@10
|
332 mov a4, a4, asr #20
|
yading@10
|
333 orr a2, a2, a4, lsl #16
|
yading@10
|
334 str a2, [a1, #(16*4)]
|
yading@10
|
335
|
yading@10
|
336 ldr pc, [sp], #4
|
yading@10
|
337 endfunc
|
yading@10
|
338
|
yading@10
|
339 .macro clip dst, src:vararg
|
yading@10
|
340 movs \dst, \src
|
yading@10
|
341 it mi
|
yading@10
|
342 movmi \dst, #0
|
yading@10
|
343 cmp \dst, #255
|
yading@10
|
344 it gt
|
yading@10
|
345 movgt \dst, #255
|
yading@10
|
346 .endm
|
yading@10
|
347
|
yading@10
|
348 .macro aclip dst, src:vararg
|
yading@10
|
349 adds \dst, \src
|
yading@10
|
350 it mi
|
yading@10
|
351 movmi \dst, #0
|
yading@10
|
352 cmp \dst, #255
|
yading@10
|
353 it gt
|
yading@10
|
354 movgt \dst, #255
|
yading@10
|
355 .endm
|
yading@10
|
356
|
yading@10
|
357 function idct_col_put_armv5te
|
yading@10
|
358 str lr, [sp, #-4]!
|
yading@10
|
359
|
yading@10
|
360 idct_col
|
yading@10
|
361
|
yading@10
|
362 ldmfd sp!, {a3, a4}
|
yading@10
|
363 ldr lr, [sp, #32]
|
yading@10
|
364 add a2, a3, v1
|
yading@10
|
365 clip a2, a2, asr #20
|
yading@10
|
366 add ip, a4, v2
|
yading@10
|
367 clip ip, ip, asr #20
|
yading@10
|
368 orr a2, a2, ip, lsl #8
|
yading@10
|
369 sub a3, a3, v1
|
yading@10
|
370 clip a3, a3, asr #20
|
yading@10
|
371 sub a4, a4, v2
|
yading@10
|
372 clip a4, a4, asr #20
|
yading@10
|
373 ldr v1, [sp, #28]
|
yading@10
|
374 strh a2, [v1]
|
yading@10
|
375 add a2, v1, #2
|
yading@10
|
376 str a2, [sp, #28]
|
yading@10
|
377 orr a2, a3, a4, lsl #8
|
yading@10
|
378 rsb v2, lr, lr, lsl #3
|
yading@10
|
379 ldmfd sp!, {a3, a4}
|
yading@10
|
380 strh_pre a2, v2, v1
|
yading@10
|
381
|
yading@10
|
382 sub a2, a3, v3
|
yading@10
|
383 clip a2, a2, asr #20
|
yading@10
|
384 sub ip, a4, v4
|
yading@10
|
385 clip ip, ip, asr #20
|
yading@10
|
386 orr a2, a2, ip, lsl #8
|
yading@10
|
387 strh_pre a2, v1, lr
|
yading@10
|
388 add a3, a3, v3
|
yading@10
|
389 clip a2, a3, asr #20
|
yading@10
|
390 add a4, a4, v4
|
yading@10
|
391 clip a4, a4, asr #20
|
yading@10
|
392 orr a2, a2, a4, lsl #8
|
yading@10
|
393 ldmfd sp!, {a3, a4}
|
yading@10
|
394 strh_dpre a2, v2, lr
|
yading@10
|
395
|
yading@10
|
396 add a2, a3, v5
|
yading@10
|
397 clip a2, a2, asr #20
|
yading@10
|
398 add ip, a4, v6
|
yading@10
|
399 clip ip, ip, asr #20
|
yading@10
|
400 orr a2, a2, ip, lsl #8
|
yading@10
|
401 strh_pre a2, v1, lr
|
yading@10
|
402 sub a3, a3, v5
|
yading@10
|
403 clip a2, a3, asr #20
|
yading@10
|
404 sub a4, a4, v6
|
yading@10
|
405 clip a4, a4, asr #20
|
yading@10
|
406 orr a2, a2, a4, lsl #8
|
yading@10
|
407 ldmfd sp!, {a3, a4}
|
yading@10
|
408 strh_dpre a2, v2, lr
|
yading@10
|
409
|
yading@10
|
410 add a2, a3, v7
|
yading@10
|
411 clip a2, a2, asr #20
|
yading@10
|
412 add ip, a4, fp
|
yading@10
|
413 clip ip, ip, asr #20
|
yading@10
|
414 orr a2, a2, ip, lsl #8
|
yading@10
|
415 strh a2, [v1, lr]
|
yading@10
|
416 sub a3, a3, v7
|
yading@10
|
417 clip a2, a3, asr #20
|
yading@10
|
418 sub a4, a4, fp
|
yading@10
|
419 clip a4, a4, asr #20
|
yading@10
|
420 orr a2, a2, a4, lsl #8
|
yading@10
|
421 strh_dpre a2, v2, lr
|
yading@10
|
422
|
yading@10
|
423 ldr pc, [sp], #4
|
yading@10
|
424 endfunc
|
yading@10
|
425
|
yading@10
|
426 function idct_col_add_armv5te
|
yading@10
|
427 str lr, [sp, #-4]!
|
yading@10
|
428
|
yading@10
|
429 idct_col
|
yading@10
|
430
|
yading@10
|
431 ldr lr, [sp, #36]
|
yading@10
|
432
|
yading@10
|
433 ldmfd sp!, {a3, a4}
|
yading@10
|
434 ldrh ip, [lr]
|
yading@10
|
435 add a2, a3, v1
|
yading@10
|
436 sub a3, a3, v1
|
yading@10
|
437 and v1, ip, #255
|
yading@10
|
438 aclip a2, v1, a2, asr #20
|
yading@10
|
439 add v1, a4, v2
|
yading@10
|
440 mov v1, v1, asr #20
|
yading@10
|
441 aclip v1, v1, ip, lsr #8
|
yading@10
|
442 orr a2, a2, v1, lsl #8
|
yading@10
|
443 ldr v1, [sp, #32]
|
yading@10
|
444 sub a4, a4, v2
|
yading@10
|
445 rsb v2, v1, v1, lsl #3
|
yading@10
|
446 ldrh_pre ip, v2, lr
|
yading@10
|
447 strh a2, [lr]
|
yading@10
|
448 and a2, ip, #255
|
yading@10
|
449 aclip a3, a2, a3, asr #20
|
yading@10
|
450 mov a4, a4, asr #20
|
yading@10
|
451 aclip a4, a4, ip, lsr #8
|
yading@10
|
452 add a2, lr, #2
|
yading@10
|
453 str a2, [sp, #28]
|
yading@10
|
454 orr a2, a3, a4, lsl #8
|
yading@10
|
455 strh a2, [v2]
|
yading@10
|
456
|
yading@10
|
457 ldmfd sp!, {a3, a4}
|
yading@10
|
458 ldrh_pre ip, lr, v1
|
yading@10
|
459 sub a2, a3, v3
|
yading@10
|
460 add a3, a3, v3
|
yading@10
|
461 and v3, ip, #255
|
yading@10
|
462 aclip a2, v3, a2, asr #20
|
yading@10
|
463 sub v3, a4, v4
|
yading@10
|
464 mov v3, v3, asr #20
|
yading@10
|
465 aclip v3, v3, ip, lsr #8
|
yading@10
|
466 orr a2, a2, v3, lsl #8
|
yading@10
|
467 add a4, a4, v4
|
yading@10
|
468 ldrh_dpre ip, v2, v1
|
yading@10
|
469 strh a2, [lr]
|
yading@10
|
470 and a2, ip, #255
|
yading@10
|
471 aclip a3, a2, a3, asr #20
|
yading@10
|
472 mov a4, a4, asr #20
|
yading@10
|
473 aclip a4, a4, ip, lsr #8
|
yading@10
|
474 orr a2, a3, a4, lsl #8
|
yading@10
|
475 strh a2, [v2]
|
yading@10
|
476
|
yading@10
|
477 ldmfd sp!, {a3, a4}
|
yading@10
|
478 ldrh_pre ip, lr, v1
|
yading@10
|
479 add a2, a3, v5
|
yading@10
|
480 sub a3, a3, v5
|
yading@10
|
481 and v3, ip, #255
|
yading@10
|
482 aclip a2, v3, a2, asr #20
|
yading@10
|
483 add v3, a4, v6
|
yading@10
|
484 mov v3, v3, asr #20
|
yading@10
|
485 aclip v3, v3, ip, lsr #8
|
yading@10
|
486 orr a2, a2, v3, lsl #8
|
yading@10
|
487 sub a4, a4, v6
|
yading@10
|
488 ldrh_dpre ip, v2, v1
|
yading@10
|
489 strh a2, [lr]
|
yading@10
|
490 and a2, ip, #255
|
yading@10
|
491 aclip a3, a2, a3, asr #20
|
yading@10
|
492 mov a4, a4, asr #20
|
yading@10
|
493 aclip a4, a4, ip, lsr #8
|
yading@10
|
494 orr a2, a3, a4, lsl #8
|
yading@10
|
495 strh a2, [v2]
|
yading@10
|
496
|
yading@10
|
497 ldmfd sp!, {a3, a4}
|
yading@10
|
498 ldrh_pre ip, lr, v1
|
yading@10
|
499 add a2, a3, v7
|
yading@10
|
500 sub a3, a3, v7
|
yading@10
|
501 and v3, ip, #255
|
yading@10
|
502 aclip a2, v3, a2, asr #20
|
yading@10
|
503 add v3, a4, fp
|
yading@10
|
504 mov v3, v3, asr #20
|
yading@10
|
505 aclip v3, v3, ip, lsr #8
|
yading@10
|
506 orr a2, a2, v3, lsl #8
|
yading@10
|
507 sub a4, a4, fp
|
yading@10
|
508 ldrh_dpre ip, v2, v1
|
yading@10
|
509 strh a2, [lr]
|
yading@10
|
510 and a2, ip, #255
|
yading@10
|
511 aclip a3, a2, a3, asr #20
|
yading@10
|
512 mov a4, a4, asr #20
|
yading@10
|
513 aclip a4, a4, ip, lsr #8
|
yading@10
|
514 orr a2, a3, a4, lsl #8
|
yading@10
|
515 strh a2, [v2]
|
yading@10
|
516
|
yading@10
|
517 ldr pc, [sp], #4
|
yading@10
|
518 endfunc
|
yading@10
|
519
|
yading@10
|
520 function ff_simple_idct_armv5te, export=1
|
yading@10
|
521 stmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, lr}
|
yading@10
|
522
|
yading@10
|
523 bl idct_row_armv5te
|
yading@10
|
524 add a1, a1, #16
|
yading@10
|
525 bl idct_row_armv5te
|
yading@10
|
526 add a1, a1, #16
|
yading@10
|
527 bl idct_row_armv5te
|
yading@10
|
528 add a1, a1, #16
|
yading@10
|
529 bl idct_row_armv5te
|
yading@10
|
530 add a1, a1, #16
|
yading@10
|
531 bl idct_row_armv5te
|
yading@10
|
532 add a1, a1, #16
|
yading@10
|
533 bl idct_row_armv5te
|
yading@10
|
534 add a1, a1, #16
|
yading@10
|
535 bl idct_row_armv5te
|
yading@10
|
536 add a1, a1, #16
|
yading@10
|
537 bl idct_row_armv5te
|
yading@10
|
538
|
yading@10
|
539 sub a1, a1, #(16*7)
|
yading@10
|
540
|
yading@10
|
541 bl idct_col_armv5te
|
yading@10
|
542 add a1, a1, #4
|
yading@10
|
543 bl idct_col_armv5te
|
yading@10
|
544 add a1, a1, #4
|
yading@10
|
545 bl idct_col_armv5te
|
yading@10
|
546 add a1, a1, #4
|
yading@10
|
547 bl idct_col_armv5te
|
yading@10
|
548
|
yading@10
|
549 ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
|
yading@10
|
550 endfunc
|
yading@10
|
551
|
yading@10
|
552 function ff_simple_idct_add_armv5te, export=1
|
yading@10
|
553 stmfd sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr}
|
yading@10
|
554
|
yading@10
|
555 mov a1, a3
|
yading@10
|
556
|
yading@10
|
557 bl idct_row_armv5te
|
yading@10
|
558 add a1, a1, #16
|
yading@10
|
559 bl idct_row_armv5te
|
yading@10
|
560 add a1, a1, #16
|
yading@10
|
561 bl idct_row_armv5te
|
yading@10
|
562 add a1, a1, #16
|
yading@10
|
563 bl idct_row_armv5te
|
yading@10
|
564 add a1, a1, #16
|
yading@10
|
565 bl idct_row_armv5te
|
yading@10
|
566 add a1, a1, #16
|
yading@10
|
567 bl idct_row_armv5te
|
yading@10
|
568 add a1, a1, #16
|
yading@10
|
569 bl idct_row_armv5te
|
yading@10
|
570 add a1, a1, #16
|
yading@10
|
571 bl idct_row_armv5te
|
yading@10
|
572
|
yading@10
|
573 sub a1, a1, #(16*7)
|
yading@10
|
574
|
yading@10
|
575 bl idct_col_add_armv5te
|
yading@10
|
576 add a1, a1, #4
|
yading@10
|
577 bl idct_col_add_armv5te
|
yading@10
|
578 add a1, a1, #4
|
yading@10
|
579 bl idct_col_add_armv5te
|
yading@10
|
580 add a1, a1, #4
|
yading@10
|
581 bl idct_col_add_armv5te
|
yading@10
|
582
|
yading@10
|
583 add sp, sp, #8
|
yading@10
|
584 ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
|
yading@10
|
585 endfunc
|
yading@10
|
586
|
yading@10
|
587 function ff_simple_idct_put_armv5te, export=1
|
yading@10
|
588 stmfd sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr}
|
yading@10
|
589
|
yading@10
|
590 mov a1, a3
|
yading@10
|
591
|
yading@10
|
592 bl idct_row_armv5te
|
yading@10
|
593 add a1, a1, #16
|
yading@10
|
594 bl idct_row_armv5te
|
yading@10
|
595 add a1, a1, #16
|
yading@10
|
596 bl idct_row_armv5te
|
yading@10
|
597 add a1, a1, #16
|
yading@10
|
598 bl idct_row_armv5te
|
yading@10
|
599 add a1, a1, #16
|
yading@10
|
600 bl idct_row_armv5te
|
yading@10
|
601 add a1, a1, #16
|
yading@10
|
602 bl idct_row_armv5te
|
yading@10
|
603 add a1, a1, #16
|
yading@10
|
604 bl idct_row_armv5te
|
yading@10
|
605 add a1, a1, #16
|
yading@10
|
606 bl idct_row_armv5te
|
yading@10
|
607
|
yading@10
|
608 sub a1, a1, #(16*7)
|
yading@10
|
609
|
yading@10
|
610 bl idct_col_put_armv5te
|
yading@10
|
611 add a1, a1, #4
|
yading@10
|
612 bl idct_col_put_armv5te
|
yading@10
|
613 add a1, a1, #4
|
yading@10
|
614 bl idct_col_put_armv5te
|
yading@10
|
615 add a1, a1, #4
|
yading@10
|
616 bl idct_col_put_armv5te
|
yading@10
|
617
|
yading@10
|
618 add sp, sp, #8
|
yading@10
|
619 ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
|
yading@10
|
620 endfunc
|