annotate ffmpeg/libavcodec/arm/simple_idct_armv5te.S @ 13:844d341cf643 tip

Back up before ISMIR
author Yading Song <yading.song@eecs.qmul.ac.uk>
date Thu, 31 Oct 2013 13:17:06 +0000
parents 6840f77b83aa
children
rev   line source
yading@10 1 /*
yading@10 2 * Simple IDCT
yading@10 3 *
yading@10 4 * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
yading@10 5 * Copyright (c) 2006 Mans Rullgard <mans@mansr.com>
yading@10 6 *
yading@10 7 * This file is part of FFmpeg.
yading@10 8 *
yading@10 9 * FFmpeg is free software; you can redistribute it and/or
yading@10 10 * modify it under the terms of the GNU Lesser General Public
yading@10 11 * License as published by the Free Software Foundation; either
yading@10 12 * version 2.1 of the License, or (at your option) any later version.
yading@10 13 *
yading@10 14 * FFmpeg is distributed in the hope that it will be useful,
yading@10 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
yading@10 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
yading@10 17 * Lesser General Public License for more details.
yading@10 18 *
yading@10 19 * You should have received a copy of the GNU Lesser General Public
yading@10 20 * License along with FFmpeg; if not, write to the Free Software
yading@10 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
yading@10 22 */
yading@10 23
yading@10 24 #include "libavutil/arm/asm.S"
yading@10 25
yading@10 26 #define W1 22725 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
yading@10 27 #define W2 21407 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
yading@10 28 #define W3 19266 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
yading@10 29 #define W4 16383 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
yading@10 30 #define W5 12873 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
yading@10 31 #define W6 8867 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
yading@10 32 #define W7 4520 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
yading@10 33 #define ROW_SHIFT 11
yading@10 34 #define COL_SHIFT 20
yading@10 35
yading@10 36 #define W13 (W1 | (W3 << 16))
yading@10 37 #define W26 (W2 | (W6 << 16))
yading@10 38 #define W57 (W5 | (W7 << 16))
yading@10 39
yading@10 40 function idct_row_armv5te
yading@10 41 str lr, [sp, #-4]!
yading@10 42
yading@10 43 ldrd v1, v2, [a1, #8]
yading@10 44 ldrd a3, a4, [a1] /* a3 = row[1:0], a4 = row[3:2] */
yading@10 45 orrs v1, v1, v2
yading@10 46 itt eq
yading@10 47 cmpeq v1, a4
yading@10 48 cmpeq v1, a3, lsr #16
yading@10 49 beq row_dc_only
yading@10 50
yading@10 51 mov v1, #(1<<(ROW_SHIFT-1))
yading@10 52 mov ip, #16384
yading@10 53 sub ip, ip, #1 /* ip = W4 */
yading@10 54 smlabb v1, ip, a3, v1 /* v1 = W4*row[0]+(1<<(RS-1)) */
yading@10 55 ldr ip, =W26 /* ip = W2 | (W6 << 16) */
yading@10 56 smultb a2, ip, a4
yading@10 57 smulbb lr, ip, a4
yading@10 58 add v2, v1, a2
yading@10 59 sub v3, v1, a2
yading@10 60 sub v4, v1, lr
yading@10 61 add v1, v1, lr
yading@10 62
yading@10 63 ldr ip, =W13 /* ip = W1 | (W3 << 16) */
yading@10 64 ldr lr, =W57 /* lr = W5 | (W7 << 16) */
yading@10 65 smulbt v5, ip, a3
yading@10 66 smultt v6, lr, a4
yading@10 67 smlatt v5, ip, a4, v5
yading@10 68 smultt a2, ip, a3
yading@10 69 smulbt v7, lr, a3
yading@10 70 sub v6, v6, a2
yading@10 71 smulbt a2, ip, a4
yading@10 72 smultt fp, lr, a3
yading@10 73 sub v7, v7, a2
yading@10 74 smulbt a2, lr, a4
yading@10 75 ldrd a3, a4, [a1, #8] /* a3=row[5:4] a4=row[7:6] */
yading@10 76 sub fp, fp, a2
yading@10 77
yading@10 78 orrs a2, a3, a4
yading@10 79 beq 1f
yading@10 80
yading@10 81 smlabt v5, lr, a3, v5
yading@10 82 smlabt v6, ip, a3, v6
yading@10 83 smlatt v5, lr, a4, v5
yading@10 84 smlabt v6, lr, a4, v6
yading@10 85 smlatt v7, lr, a3, v7
yading@10 86 smlatt fp, ip, a3, fp
yading@10 87 smulbt a2, ip, a4
yading@10 88 smlatt v7, ip, a4, v7
yading@10 89 sub fp, fp, a2
yading@10 90
yading@10 91 ldr ip, =W26 /* ip = W2 | (W6 << 16) */
yading@10 92 mov a2, #16384
yading@10 93 sub a2, a2, #1 /* a2 = W4 */
yading@10 94 smulbb a2, a2, a3 /* a2 = W4*row[4] */
yading@10 95 smultb lr, ip, a4 /* lr = W6*row[6] */
yading@10 96 add v1, v1, a2 /* v1 += W4*row[4] */
yading@10 97 add v1, v1, lr /* v1 += W6*row[6] */
yading@10 98 add v4, v4, a2 /* v4 += W4*row[4] */
yading@10 99 sub v4, v4, lr /* v4 -= W6*row[6] */
yading@10 100 smulbb lr, ip, a4 /* lr = W2*row[6] */
yading@10 101 sub v2, v2, a2 /* v2 -= W4*row[4] */
yading@10 102 sub v2, v2, lr /* v2 -= W2*row[6] */
yading@10 103 sub v3, v3, a2 /* v3 -= W4*row[4] */
yading@10 104 add v3, v3, lr /* v3 += W2*row[6] */
yading@10 105
yading@10 106 1: add a2, v1, v5
yading@10 107 mov a3, a2, lsr #11
yading@10 108 bic a3, a3, #0x1f0000
yading@10 109 sub a2, v2, v6
yading@10 110 mov a2, a2, lsr #11
yading@10 111 add a3, a3, a2, lsl #16
yading@10 112 add a2, v3, v7
yading@10 113 mov a4, a2, lsr #11
yading@10 114 bic a4, a4, #0x1f0000
yading@10 115 add a2, v4, fp
yading@10 116 mov a2, a2, lsr #11
yading@10 117 add a4, a4, a2, lsl #16
yading@10 118 strd a3, a4, [a1]
yading@10 119
yading@10 120 sub a2, v4, fp
yading@10 121 mov a3, a2, lsr #11
yading@10 122 bic a3, a3, #0x1f0000
yading@10 123 sub a2, v3, v7
yading@10 124 mov a2, a2, lsr #11
yading@10 125 add a3, a3, a2, lsl #16
yading@10 126 add a2, v2, v6
yading@10 127 mov a4, a2, lsr #11
yading@10 128 bic a4, a4, #0x1f0000
yading@10 129 sub a2, v1, v5
yading@10 130 mov a2, a2, lsr #11
yading@10 131 add a4, a4, a2, lsl #16
yading@10 132 strd a3, a4, [a1, #8]
yading@10 133
yading@10 134 ldr pc, [sp], #4
yading@10 135
yading@10 136 row_dc_only:
yading@10 137 orr a3, a3, a3, lsl #16
yading@10 138 bic a3, a3, #0xe000
yading@10 139 mov a3, a3, lsl #3
yading@10 140 mov a4, a3
yading@10 141 strd a3, a4, [a1]
yading@10 142 strd a3, a4, [a1, #8]
yading@10 143
yading@10 144 ldr pc, [sp], #4
yading@10 145 endfunc
yading@10 146
yading@10 147 .macro idct_col
yading@10 148 ldr a4, [a1] /* a4 = col[1:0] */
yading@10 149 mov ip, #16384
yading@10 150 sub ip, ip, #1 /* ip = W4 */
yading@10 151 #if 0
yading@10 152 mov v1, #(1<<(COL_SHIFT-1))
yading@10 153 smlabt v2, ip, a4, v1 /* v2 = W4*col[1] + (1<<(COL_SHIFT-1)) */
yading@10 154 smlabb v1, ip, a4, v1 /* v1 = W4*col[0] + (1<<(COL_SHIFT-1)) */
yading@10 155 ldr a4, [a1, #(16*4)]
yading@10 156 #else
yading@10 157 mov v1, #((1<<(COL_SHIFT-1))/W4) /* this matches the C version */
yading@10 158 add v2, v1, a4, asr #16
yading@10 159 rsb v2, v2, v2, lsl #14
yading@10 160 mov a4, a4, lsl #16
yading@10 161 add v1, v1, a4, asr #16
yading@10 162 ldr a4, [a1, #(16*4)]
yading@10 163 rsb v1, v1, v1, lsl #14
yading@10 164 #endif
yading@10 165
yading@10 166 smulbb lr, ip, a4
yading@10 167 smulbt a3, ip, a4
yading@10 168 sub v3, v1, lr
yading@10 169 sub v5, v1, lr
yading@10 170 add v7, v1, lr
yading@10 171 add v1, v1, lr
yading@10 172 sub v4, v2, a3
yading@10 173 sub v6, v2, a3
yading@10 174 add fp, v2, a3
yading@10 175 ldr ip, =W26
yading@10 176 ldr a4, [a1, #(16*2)]
yading@10 177 add v2, v2, a3
yading@10 178
yading@10 179 smulbb lr, ip, a4
yading@10 180 smultb a3, ip, a4
yading@10 181 add v1, v1, lr
yading@10 182 sub v7, v7, lr
yading@10 183 add v3, v3, a3
yading@10 184 sub v5, v5, a3
yading@10 185 smulbt lr, ip, a4
yading@10 186 smultt a3, ip, a4
yading@10 187 add v2, v2, lr
yading@10 188 sub fp, fp, lr
yading@10 189 add v4, v4, a3
yading@10 190 ldr a4, [a1, #(16*6)]
yading@10 191 sub v6, v6, a3
yading@10 192
yading@10 193 smultb lr, ip, a4
yading@10 194 smulbb a3, ip, a4
yading@10 195 add v1, v1, lr
yading@10 196 sub v7, v7, lr
yading@10 197 sub v3, v3, a3
yading@10 198 add v5, v5, a3
yading@10 199 smultt lr, ip, a4
yading@10 200 smulbt a3, ip, a4
yading@10 201 add v2, v2, lr
yading@10 202 sub fp, fp, lr
yading@10 203 sub v4, v4, a3
yading@10 204 add v6, v6, a3
yading@10 205
yading@10 206 stmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp}
yading@10 207
yading@10 208 ldr ip, =W13
yading@10 209 ldr a4, [a1, #(16*1)]
yading@10 210 ldr lr, =W57
yading@10 211 smulbb v1, ip, a4
yading@10 212 smultb v3, ip, a4
yading@10 213 smulbb v5, lr, a4
yading@10 214 smultb v7, lr, a4
yading@10 215 smulbt v2, ip, a4
yading@10 216 smultt v4, ip, a4
yading@10 217 smulbt v6, lr, a4
yading@10 218 smultt fp, lr, a4
yading@10 219 rsb v4, v4, #0
yading@10 220 ldr a4, [a1, #(16*3)]
yading@10 221 rsb v3, v3, #0
yading@10 222
yading@10 223 smlatb v1, ip, a4, v1
yading@10 224 smlatb v3, lr, a4, v3
yading@10 225 smulbb a3, ip, a4
yading@10 226 smulbb a2, lr, a4
yading@10 227 sub v5, v5, a3
yading@10 228 sub v7, v7, a2
yading@10 229 smlatt v2, ip, a4, v2
yading@10 230 smlatt v4, lr, a4, v4
yading@10 231 smulbt a3, ip, a4
yading@10 232 smulbt a2, lr, a4
yading@10 233 sub v6, v6, a3
yading@10 234 ldr a4, [a1, #(16*5)]
yading@10 235 sub fp, fp, a2
yading@10 236
yading@10 237 smlabb v1, lr, a4, v1
yading@10 238 smlabb v3, ip, a4, v3
yading@10 239 smlatb v5, lr, a4, v5
yading@10 240 smlatb v7, ip, a4, v7
yading@10 241 smlabt v2, lr, a4, v2
yading@10 242 smlabt v4, ip, a4, v4
yading@10 243 smlatt v6, lr, a4, v6
yading@10 244 ldr a3, [a1, #(16*7)]
yading@10 245 smlatt fp, ip, a4, fp
yading@10 246
yading@10 247 smlatb v1, lr, a3, v1
yading@10 248 smlabb v3, lr, a3, v3
yading@10 249 smlatb v5, ip, a3, v5
yading@10 250 smulbb a4, ip, a3
yading@10 251 smlatt v2, lr, a3, v2
yading@10 252 sub v7, v7, a4
yading@10 253 smlabt v4, lr, a3, v4
yading@10 254 smulbt a4, ip, a3
yading@10 255 smlatt v6, ip, a3, v6
yading@10 256 sub fp, fp, a4
yading@10 257 .endm
yading@10 258
yading@10 259 function idct_col_armv5te
yading@10 260 str lr, [sp, #-4]!
yading@10 261
yading@10 262 idct_col
yading@10 263
yading@10 264 ldmfd sp!, {a3, a4}
yading@10 265 adds a2, a3, v1
yading@10 266 mov a2, a2, lsr #20
yading@10 267 it mi
yading@10 268 orrmi a2, a2, #0xf000
yading@10 269 add ip, a4, v2
yading@10 270 mov ip, ip, asr #20
yading@10 271 orr a2, a2, ip, lsl #16
yading@10 272 str a2, [a1]
yading@10 273 subs a3, a3, v1
yading@10 274 mov a2, a3, lsr #20
yading@10 275 it mi
yading@10 276 orrmi a2, a2, #0xf000
yading@10 277 sub a4, a4, v2
yading@10 278 mov a4, a4, asr #20
yading@10 279 orr a2, a2, a4, lsl #16
yading@10 280 ldmfd sp!, {a3, a4}
yading@10 281 str a2, [a1, #(16*7)]
yading@10 282
yading@10 283 subs a2, a3, v3
yading@10 284 mov a2, a2, lsr #20
yading@10 285 it mi
yading@10 286 orrmi a2, a2, #0xf000
yading@10 287 sub ip, a4, v4
yading@10 288 mov ip, ip, asr #20
yading@10 289 orr a2, a2, ip, lsl #16
yading@10 290 str a2, [a1, #(16*1)]
yading@10 291 adds a3, a3, v3
yading@10 292 mov a2, a3, lsr #20
yading@10 293 it mi
yading@10 294 orrmi a2, a2, #0xf000
yading@10 295 add a4, a4, v4
yading@10 296 mov a4, a4, asr #20
yading@10 297 orr a2, a2, a4, lsl #16
yading@10 298 ldmfd sp!, {a3, a4}
yading@10 299 str a2, [a1, #(16*6)]
yading@10 300
yading@10 301 adds a2, a3, v5
yading@10 302 mov a2, a2, lsr #20
yading@10 303 it mi
yading@10 304 orrmi a2, a2, #0xf000
yading@10 305 add ip, a4, v6
yading@10 306 mov ip, ip, asr #20
yading@10 307 orr a2, a2, ip, lsl #16
yading@10 308 str a2, [a1, #(16*2)]
yading@10 309 subs a3, a3, v5
yading@10 310 mov a2, a3, lsr #20
yading@10 311 it mi
yading@10 312 orrmi a2, a2, #0xf000
yading@10 313 sub a4, a4, v6
yading@10 314 mov a4, a4, asr #20
yading@10 315 orr a2, a2, a4, lsl #16
yading@10 316 ldmfd sp!, {a3, a4}
yading@10 317 str a2, [a1, #(16*5)]
yading@10 318
yading@10 319 adds a2, a3, v7
yading@10 320 mov a2, a2, lsr #20
yading@10 321 it mi
yading@10 322 orrmi a2, a2, #0xf000
yading@10 323 add ip, a4, fp
yading@10 324 mov ip, ip, asr #20
yading@10 325 orr a2, a2, ip, lsl #16
yading@10 326 str a2, [a1, #(16*3)]
yading@10 327 subs a3, a3, v7
yading@10 328 mov a2, a3, lsr #20
yading@10 329 it mi
yading@10 330 orrmi a2, a2, #0xf000
yading@10 331 sub a4, a4, fp
yading@10 332 mov a4, a4, asr #20
yading@10 333 orr a2, a2, a4, lsl #16
yading@10 334 str a2, [a1, #(16*4)]
yading@10 335
yading@10 336 ldr pc, [sp], #4
yading@10 337 endfunc
yading@10 338
yading@10 339 .macro clip dst, src:vararg
yading@10 340 movs \dst, \src
yading@10 341 it mi
yading@10 342 movmi \dst, #0
yading@10 343 cmp \dst, #255
yading@10 344 it gt
yading@10 345 movgt \dst, #255
yading@10 346 .endm
yading@10 347
yading@10 348 .macro aclip dst, src:vararg
yading@10 349 adds \dst, \src
yading@10 350 it mi
yading@10 351 movmi \dst, #0
yading@10 352 cmp \dst, #255
yading@10 353 it gt
yading@10 354 movgt \dst, #255
yading@10 355 .endm
yading@10 356
yading@10 357 function idct_col_put_armv5te
yading@10 358 str lr, [sp, #-4]!
yading@10 359
yading@10 360 idct_col
yading@10 361
yading@10 362 ldmfd sp!, {a3, a4}
yading@10 363 ldr lr, [sp, #32]
yading@10 364 add a2, a3, v1
yading@10 365 clip a2, a2, asr #20
yading@10 366 add ip, a4, v2
yading@10 367 clip ip, ip, asr #20
yading@10 368 orr a2, a2, ip, lsl #8
yading@10 369 sub a3, a3, v1
yading@10 370 clip a3, a3, asr #20
yading@10 371 sub a4, a4, v2
yading@10 372 clip a4, a4, asr #20
yading@10 373 ldr v1, [sp, #28]
yading@10 374 strh a2, [v1]
yading@10 375 add a2, v1, #2
yading@10 376 str a2, [sp, #28]
yading@10 377 orr a2, a3, a4, lsl #8
yading@10 378 rsb v2, lr, lr, lsl #3
yading@10 379 ldmfd sp!, {a3, a4}
yading@10 380 strh_pre a2, v2, v1
yading@10 381
yading@10 382 sub a2, a3, v3
yading@10 383 clip a2, a2, asr #20
yading@10 384 sub ip, a4, v4
yading@10 385 clip ip, ip, asr #20
yading@10 386 orr a2, a2, ip, lsl #8
yading@10 387 strh_pre a2, v1, lr
yading@10 388 add a3, a3, v3
yading@10 389 clip a2, a3, asr #20
yading@10 390 add a4, a4, v4
yading@10 391 clip a4, a4, asr #20
yading@10 392 orr a2, a2, a4, lsl #8
yading@10 393 ldmfd sp!, {a3, a4}
yading@10 394 strh_dpre a2, v2, lr
yading@10 395
yading@10 396 add a2, a3, v5
yading@10 397 clip a2, a2, asr #20
yading@10 398 add ip, a4, v6
yading@10 399 clip ip, ip, asr #20
yading@10 400 orr a2, a2, ip, lsl #8
yading@10 401 strh_pre a2, v1, lr
yading@10 402 sub a3, a3, v5
yading@10 403 clip a2, a3, asr #20
yading@10 404 sub a4, a4, v6
yading@10 405 clip a4, a4, asr #20
yading@10 406 orr a2, a2, a4, lsl #8
yading@10 407 ldmfd sp!, {a3, a4}
yading@10 408 strh_dpre a2, v2, lr
yading@10 409
yading@10 410 add a2, a3, v7
yading@10 411 clip a2, a2, asr #20
yading@10 412 add ip, a4, fp
yading@10 413 clip ip, ip, asr #20
yading@10 414 orr a2, a2, ip, lsl #8
yading@10 415 strh a2, [v1, lr]
yading@10 416 sub a3, a3, v7
yading@10 417 clip a2, a3, asr #20
yading@10 418 sub a4, a4, fp
yading@10 419 clip a4, a4, asr #20
yading@10 420 orr a2, a2, a4, lsl #8
yading@10 421 strh_dpre a2, v2, lr
yading@10 422
yading@10 423 ldr pc, [sp], #4
yading@10 424 endfunc
yading@10 425
yading@10 426 function idct_col_add_armv5te
yading@10 427 str lr, [sp, #-4]!
yading@10 428
yading@10 429 idct_col
yading@10 430
yading@10 431 ldr lr, [sp, #36]
yading@10 432
yading@10 433 ldmfd sp!, {a3, a4}
yading@10 434 ldrh ip, [lr]
yading@10 435 add a2, a3, v1
yading@10 436 sub a3, a3, v1
yading@10 437 and v1, ip, #255
yading@10 438 aclip a2, v1, a2, asr #20
yading@10 439 add v1, a4, v2
yading@10 440 mov v1, v1, asr #20
yading@10 441 aclip v1, v1, ip, lsr #8
yading@10 442 orr a2, a2, v1, lsl #8
yading@10 443 ldr v1, [sp, #32]
yading@10 444 sub a4, a4, v2
yading@10 445 rsb v2, v1, v1, lsl #3
yading@10 446 ldrh_pre ip, v2, lr
yading@10 447 strh a2, [lr]
yading@10 448 and a2, ip, #255
yading@10 449 aclip a3, a2, a3, asr #20
yading@10 450 mov a4, a4, asr #20
yading@10 451 aclip a4, a4, ip, lsr #8
yading@10 452 add a2, lr, #2
yading@10 453 str a2, [sp, #28]
yading@10 454 orr a2, a3, a4, lsl #8
yading@10 455 strh a2, [v2]
yading@10 456
yading@10 457 ldmfd sp!, {a3, a4}
yading@10 458 ldrh_pre ip, lr, v1
yading@10 459 sub a2, a3, v3
yading@10 460 add a3, a3, v3
yading@10 461 and v3, ip, #255
yading@10 462 aclip a2, v3, a2, asr #20
yading@10 463 sub v3, a4, v4
yading@10 464 mov v3, v3, asr #20
yading@10 465 aclip v3, v3, ip, lsr #8
yading@10 466 orr a2, a2, v3, lsl #8
yading@10 467 add a4, a4, v4
yading@10 468 ldrh_dpre ip, v2, v1
yading@10 469 strh a2, [lr]
yading@10 470 and a2, ip, #255
yading@10 471 aclip a3, a2, a3, asr #20
yading@10 472 mov a4, a4, asr #20
yading@10 473 aclip a4, a4, ip, lsr #8
yading@10 474 orr a2, a3, a4, lsl #8
yading@10 475 strh a2, [v2]
yading@10 476
yading@10 477 ldmfd sp!, {a3, a4}
yading@10 478 ldrh_pre ip, lr, v1
yading@10 479 add a2, a3, v5
yading@10 480 sub a3, a3, v5
yading@10 481 and v3, ip, #255
yading@10 482 aclip a2, v3, a2, asr #20
yading@10 483 add v3, a4, v6
yading@10 484 mov v3, v3, asr #20
yading@10 485 aclip v3, v3, ip, lsr #8
yading@10 486 orr a2, a2, v3, lsl #8
yading@10 487 sub a4, a4, v6
yading@10 488 ldrh_dpre ip, v2, v1
yading@10 489 strh a2, [lr]
yading@10 490 and a2, ip, #255
yading@10 491 aclip a3, a2, a3, asr #20
yading@10 492 mov a4, a4, asr #20
yading@10 493 aclip a4, a4, ip, lsr #8
yading@10 494 orr a2, a3, a4, lsl #8
yading@10 495 strh a2, [v2]
yading@10 496
yading@10 497 ldmfd sp!, {a3, a4}
yading@10 498 ldrh_pre ip, lr, v1
yading@10 499 add a2, a3, v7
yading@10 500 sub a3, a3, v7
yading@10 501 and v3, ip, #255
yading@10 502 aclip a2, v3, a2, asr #20
yading@10 503 add v3, a4, fp
yading@10 504 mov v3, v3, asr #20
yading@10 505 aclip v3, v3, ip, lsr #8
yading@10 506 orr a2, a2, v3, lsl #8
yading@10 507 sub a4, a4, fp
yading@10 508 ldrh_dpre ip, v2, v1
yading@10 509 strh a2, [lr]
yading@10 510 and a2, ip, #255
yading@10 511 aclip a3, a2, a3, asr #20
yading@10 512 mov a4, a4, asr #20
yading@10 513 aclip a4, a4, ip, lsr #8
yading@10 514 orr a2, a3, a4, lsl #8
yading@10 515 strh a2, [v2]
yading@10 516
yading@10 517 ldr pc, [sp], #4
yading@10 518 endfunc
yading@10 519
yading@10 520 function ff_simple_idct_armv5te, export=1
yading@10 521 stmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, lr}
yading@10 522
yading@10 523 bl idct_row_armv5te
yading@10 524 add a1, a1, #16
yading@10 525 bl idct_row_armv5te
yading@10 526 add a1, a1, #16
yading@10 527 bl idct_row_armv5te
yading@10 528 add a1, a1, #16
yading@10 529 bl idct_row_armv5te
yading@10 530 add a1, a1, #16
yading@10 531 bl idct_row_armv5te
yading@10 532 add a1, a1, #16
yading@10 533 bl idct_row_armv5te
yading@10 534 add a1, a1, #16
yading@10 535 bl idct_row_armv5te
yading@10 536 add a1, a1, #16
yading@10 537 bl idct_row_armv5te
yading@10 538
yading@10 539 sub a1, a1, #(16*7)
yading@10 540
yading@10 541 bl idct_col_armv5te
yading@10 542 add a1, a1, #4
yading@10 543 bl idct_col_armv5te
yading@10 544 add a1, a1, #4
yading@10 545 bl idct_col_armv5te
yading@10 546 add a1, a1, #4
yading@10 547 bl idct_col_armv5te
yading@10 548
yading@10 549 ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
yading@10 550 endfunc
yading@10 551
yading@10 552 function ff_simple_idct_add_armv5te, export=1
yading@10 553 stmfd sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr}
yading@10 554
yading@10 555 mov a1, a3
yading@10 556
yading@10 557 bl idct_row_armv5te
yading@10 558 add a1, a1, #16
yading@10 559 bl idct_row_armv5te
yading@10 560 add a1, a1, #16
yading@10 561 bl idct_row_armv5te
yading@10 562 add a1, a1, #16
yading@10 563 bl idct_row_armv5te
yading@10 564 add a1, a1, #16
yading@10 565 bl idct_row_armv5te
yading@10 566 add a1, a1, #16
yading@10 567 bl idct_row_armv5te
yading@10 568 add a1, a1, #16
yading@10 569 bl idct_row_armv5te
yading@10 570 add a1, a1, #16
yading@10 571 bl idct_row_armv5te
yading@10 572
yading@10 573 sub a1, a1, #(16*7)
yading@10 574
yading@10 575 bl idct_col_add_armv5te
yading@10 576 add a1, a1, #4
yading@10 577 bl idct_col_add_armv5te
yading@10 578 add a1, a1, #4
yading@10 579 bl idct_col_add_armv5te
yading@10 580 add a1, a1, #4
yading@10 581 bl idct_col_add_armv5te
yading@10 582
yading@10 583 add sp, sp, #8
yading@10 584 ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
yading@10 585 endfunc
yading@10 586
yading@10 587 function ff_simple_idct_put_armv5te, export=1
yading@10 588 stmfd sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr}
yading@10 589
yading@10 590 mov a1, a3
yading@10 591
yading@10 592 bl idct_row_armv5te
yading@10 593 add a1, a1, #16
yading@10 594 bl idct_row_armv5te
yading@10 595 add a1, a1, #16
yading@10 596 bl idct_row_armv5te
yading@10 597 add a1, a1, #16
yading@10 598 bl idct_row_armv5te
yading@10 599 add a1, a1, #16
yading@10 600 bl idct_row_armv5te
yading@10 601 add a1, a1, #16
yading@10 602 bl idct_row_armv5te
yading@10 603 add a1, a1, #16
yading@10 604 bl idct_row_armv5te
yading@10 605 add a1, a1, #16
yading@10 606 bl idct_row_armv5te
yading@10 607
yading@10 608 sub a1, a1, #(16*7)
yading@10 609
yading@10 610 bl idct_col_put_armv5te
yading@10 611 add a1, a1, #4
yading@10 612 bl idct_col_put_armv5te
yading@10 613 add a1, a1, #4
yading@10 614 bl idct_col_put_armv5te
yading@10 615 add a1, a1, #4
yading@10 616 bl idct_col_put_armv5te
yading@10 617
yading@10 618 add sp, sp, #8
yading@10 619 ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
yading@10 620 endfunc