annotate ffmpeg/libavcodec/arm/hpeldsp_arm.S @ 13:844d341cf643 tip

Back up before ISMIR
author Yading Song <yading.song@eecs.qmul.ac.uk>
date Thu, 31 Oct 2013 13:17:06 +0000
parents 6840f77b83aa
children
rev   line source
yading@10 1 @
yading@10 2 @ ARMv4 optimized DSP utils
yading@10 3 @ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
yading@10 4 @
yading@10 5 @ This file is part of FFmpeg.
yading@10 6 @
yading@10 7 @ FFmpeg is free software; you can redistribute it and/or
yading@10 8 @ modify it under the terms of the GNU Lesser General Public
yading@10 9 @ License as published by the Free Software Foundation; either
yading@10 10 @ version 2.1 of the License, or (at your option) any later version.
yading@10 11 @
yading@10 12 @ FFmpeg is distributed in the hope that it will be useful,
yading@10 13 @ but WITHOUT ANY WARRANTY; without even the implied warranty of
yading@10 14 @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
yading@10 15 @ Lesser General Public License for more details.
yading@10 16 @
yading@10 17 @ You should have received a copy of the GNU Lesser General Public
yading@10 18 @ License along with FFmpeg; if not, write to the Free Software
yading@10 19 @ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
yading@10 20 @
yading@10 21
yading@10 22 #include "config.h"
yading@10 23 #include "libavutil/arm/asm.S"
yading@10 24
yading@10 25 #if !HAVE_ARMV5TE_EXTERNAL
yading@10 26 #define pld @
yading@10 27 #endif
yading@10 28
yading@10 29 .macro ALIGN_QWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4
yading@10 30 mov \Rd0, \Rn0, lsr #(\shift * 8)
yading@10 31 mov \Rd1, \Rn1, lsr #(\shift * 8)
yading@10 32 mov \Rd2, \Rn2, lsr #(\shift * 8)
yading@10 33 mov \Rd3, \Rn3, lsr #(\shift * 8)
yading@10 34 orr \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8)
yading@10 35 orr \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8)
yading@10 36 orr \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8)
yading@10 37 orr \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8)
yading@10 38 .endm
yading@10 39 .macro ALIGN_DWORD shift, R0, R1, R2
yading@10 40 mov \R0, \R0, lsr #(\shift * 8)
yading@10 41 orr \R0, \R0, \R1, lsl #(32 - \shift * 8)
yading@10 42 mov \R1, \R1, lsr #(\shift * 8)
yading@10 43 orr \R1, \R1, \R2, lsl #(32 - \shift * 8)
yading@10 44 .endm
yading@10 45 .macro ALIGN_DWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2
yading@10 46 mov \Rdst0, \Rsrc0, lsr #(\shift * 8)
yading@10 47 mov \Rdst1, \Rsrc1, lsr #(\shift * 8)
yading@10 48 orr \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8))
yading@10 49 orr \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8))
yading@10 50 .endm
yading@10 51
yading@10 52 .macro RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
yading@10 53 @ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
yading@10 54 @ Rmask = 0xFEFEFEFE
yading@10 55 @ Rn = destroy
yading@10 56 eor \Rd0, \Rn0, \Rm0
yading@10 57 eor \Rd1, \Rn1, \Rm1
yading@10 58 orr \Rn0, \Rn0, \Rm0
yading@10 59 orr \Rn1, \Rn1, \Rm1
yading@10 60 and \Rd0, \Rd0, \Rmask
yading@10 61 and \Rd1, \Rd1, \Rmask
yading@10 62 sub \Rd0, \Rn0, \Rd0, lsr #1
yading@10 63 sub \Rd1, \Rn1, \Rd1, lsr #1
yading@10 64 .endm
yading@10 65
yading@10 66 .macro NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
yading@10 67 @ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
yading@10 68 @ Rmask = 0xFEFEFEFE
yading@10 69 @ Rn = destroy
yading@10 70 eor \Rd0, \Rn0, \Rm0
yading@10 71 eor \Rd1, \Rn1, \Rm1
yading@10 72 and \Rn0, \Rn0, \Rm0
yading@10 73 and \Rn1, \Rn1, \Rm1
yading@10 74 and \Rd0, \Rd0, \Rmask
yading@10 75 and \Rd1, \Rd1, \Rmask
yading@10 76 add \Rd0, \Rn0, \Rd0, lsr #1
yading@10 77 add \Rd1, \Rn1, \Rd1, lsr #1
yading@10 78 .endm
yading@10 79
yading@10 80 .macro JMP_ALIGN tmp, reg
yading@10 81 ands \tmp, \reg, #3
yading@10 82 bic \reg, \reg, #3
yading@10 83 beq 1f
yading@10 84 subs \tmp, \tmp, #1
yading@10 85 beq 2f
yading@10 86 subs \tmp, \tmp, #1
yading@10 87 beq 3f
yading@10 88 b 4f
yading@10 89 .endm
yading@10 90
yading@10 91 @ ----------------------------------------------------------------
yading@10 92 .align 5
yading@10 93 function ff_put_pixels16_arm, export=1
yading@10 94 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
yading@10 95 @ block = word aligned, pixles = unaligned
yading@10 96 pld [r1]
yading@10 97 push {r4-r11, lr}
yading@10 98 JMP_ALIGN r5, r1
yading@10 99 1:
yading@10 100 ldm r1, {r4-r7}
yading@10 101 add r1, r1, r2
yading@10 102 stm r0, {r4-r7}
yading@10 103 pld [r1]
yading@10 104 subs r3, r3, #1
yading@10 105 add r0, r0, r2
yading@10 106 bne 1b
yading@10 107 pop {r4-r11, pc}
yading@10 108 .align 5
yading@10 109 2:
yading@10 110 ldm r1, {r4-r8}
yading@10 111 add r1, r1, r2
yading@10 112 ALIGN_QWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8
yading@10 113 pld [r1]
yading@10 114 subs r3, r3, #1
yading@10 115 stm r0, {r9-r12}
yading@10 116 add r0, r0, r2
yading@10 117 bne 2b
yading@10 118 pop {r4-r11, pc}
yading@10 119 .align 5
yading@10 120 3:
yading@10 121 ldm r1, {r4-r8}
yading@10 122 add r1, r1, r2
yading@10 123 ALIGN_QWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8
yading@10 124 pld [r1]
yading@10 125 subs r3, r3, #1
yading@10 126 stm r0, {r9-r12}
yading@10 127 add r0, r0, r2
yading@10 128 bne 3b
yading@10 129 pop {r4-r11, pc}
yading@10 130 .align 5
yading@10 131 4:
yading@10 132 ldm r1, {r4-r8}
yading@10 133 add r1, r1, r2
yading@10 134 ALIGN_QWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8
yading@10 135 pld [r1]
yading@10 136 subs r3, r3, #1
yading@10 137 stm r0, {r9-r12}
yading@10 138 add r0, r0, r2
yading@10 139 bne 4b
yading@10 140 pop {r4-r11,pc}
yading@10 141 endfunc
yading@10 142
yading@10 143 @ ----------------------------------------------------------------
yading@10 144 .align 5
yading@10 145 function ff_put_pixels8_arm, export=1
yading@10 146 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
yading@10 147 @ block = word aligned, pixles = unaligned
yading@10 148 pld [r1]
yading@10 149 push {r4-r5,lr}
yading@10 150 JMP_ALIGN r5, r1
yading@10 151 1:
yading@10 152 ldm r1, {r4-r5}
yading@10 153 add r1, r1, r2
yading@10 154 subs r3, r3, #1
yading@10 155 pld [r1]
yading@10 156 stm r0, {r4-r5}
yading@10 157 add r0, r0, r2
yading@10 158 bne 1b
yading@10 159 pop {r4-r5,pc}
yading@10 160 .align 5
yading@10 161 2:
yading@10 162 ldm r1, {r4-r5, r12}
yading@10 163 add r1, r1, r2
yading@10 164 ALIGN_DWORD 1, r4, r5, r12
yading@10 165 pld [r1]
yading@10 166 subs r3, r3, #1
yading@10 167 stm r0, {r4-r5}
yading@10 168 add r0, r0, r2
yading@10 169 bne 2b
yading@10 170 pop {r4-r5,pc}
yading@10 171 .align 5
yading@10 172 3:
yading@10 173 ldm r1, {r4-r5, r12}
yading@10 174 add r1, r1, r2
yading@10 175 ALIGN_DWORD 2, r4, r5, r12
yading@10 176 pld [r1]
yading@10 177 subs r3, r3, #1
yading@10 178 stm r0, {r4-r5}
yading@10 179 add r0, r0, r2
yading@10 180 bne 3b
yading@10 181 pop {r4-r5,pc}
yading@10 182 .align 5
yading@10 183 4:
yading@10 184 ldm r1, {r4-r5, r12}
yading@10 185 add r1, r1, r2
yading@10 186 ALIGN_DWORD 3, r4, r5, r12
yading@10 187 pld [r1]
yading@10 188 subs r3, r3, #1
yading@10 189 stm r0, {r4-r5}
yading@10 190 add r0, r0, r2
yading@10 191 bne 4b
yading@10 192 pop {r4-r5,pc}
yading@10 193 endfunc
yading@10 194
yading@10 195 @ ----------------------------------------------------------------
yading@10 196 .align 5
yading@10 197 function ff_put_pixels8_x2_arm, export=1
yading@10 198 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
yading@10 199 @ block = word aligned, pixles = unaligned
yading@10 200 pld [r1]
yading@10 201 push {r4-r10,lr}
yading@10 202 ldr r12, =0xfefefefe
yading@10 203 JMP_ALIGN r5, r1
yading@10 204 1:
yading@10 205 ldm r1, {r4-r5, r10}
yading@10 206 add r1, r1, r2
yading@10 207 ALIGN_DWORD_D 1, r6, r7, r4, r5, r10
yading@10 208 pld [r1]
yading@10 209 RND_AVG32 r8, r9, r4, r5, r6, r7, r12
yading@10 210 subs r3, r3, #1
yading@10 211 stm r0, {r8-r9}
yading@10 212 add r0, r0, r2
yading@10 213 bne 1b
yading@10 214 pop {r4-r10,pc}
yading@10 215 .align 5
yading@10 216 2:
yading@10 217 ldm r1, {r4-r5, r10}
yading@10 218 add r1, r1, r2
yading@10 219 ALIGN_DWORD_D 1, r6, r7, r4, r5, r10
yading@10 220 ALIGN_DWORD_D 2, r8, r9, r4, r5, r10
yading@10 221 pld [r1]
yading@10 222 RND_AVG32 r4, r5, r6, r7, r8, r9, r12
yading@10 223 subs r3, r3, #1
yading@10 224 stm r0, {r4-r5}
yading@10 225 add r0, r0, r2
yading@10 226 bne 2b
yading@10 227 pop {r4-r10,pc}
yading@10 228 .align 5
yading@10 229 3:
yading@10 230 ldm r1, {r4-r5, r10}
yading@10 231 add r1, r1, r2
yading@10 232 ALIGN_DWORD_D 2, r6, r7, r4, r5, r10
yading@10 233 ALIGN_DWORD_D 3, r8, r9, r4, r5, r10
yading@10 234 pld [r1]
yading@10 235 RND_AVG32 r4, r5, r6, r7, r8, r9, r12
yading@10 236 subs r3, r3, #1
yading@10 237 stm r0, {r4-r5}
yading@10 238 add r0, r0, r2
yading@10 239 bne 3b
yading@10 240 pop {r4-r10,pc}
yading@10 241 .align 5
yading@10 242 4:
yading@10 243 ldm r1, {r4-r5, r10}
yading@10 244 add r1, r1, r2
yading@10 245 ALIGN_DWORD_D 3, r6, r7, r4, r5, r10
yading@10 246 pld [r1]
yading@10 247 RND_AVG32 r8, r9, r6, r7, r5, r10, r12
yading@10 248 subs r3, r3, #1
yading@10 249 stm r0, {r8-r9}
yading@10 250 add r0, r0, r2
yading@10 251 bne 4b
yading@10 252 pop {r4-r10,pc}
yading@10 253 endfunc
yading@10 254
yading@10 255 .align 5
yading@10 256 function ff_put_no_rnd_pixels8_x2_arm, export=1
yading@10 257 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
yading@10 258 @ block = word aligned, pixles = unaligned
yading@10 259 pld [r1]
yading@10 260 push {r4-r10,lr}
yading@10 261 ldr r12, =0xfefefefe
yading@10 262 JMP_ALIGN r5, r1
yading@10 263 1:
yading@10 264 ldm r1, {r4-r5, r10}
yading@10 265 add r1, r1, r2
yading@10 266 ALIGN_DWORD_D 1, r6, r7, r4, r5, r10
yading@10 267 pld [r1]
yading@10 268 NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
yading@10 269 subs r3, r3, #1
yading@10 270 stm r0, {r8-r9}
yading@10 271 add r0, r0, r2
yading@10 272 bne 1b
yading@10 273 pop {r4-r10,pc}
yading@10 274 .align 5
yading@10 275 2:
yading@10 276 ldm r1, {r4-r5, r10}
yading@10 277 add r1, r1, r2
yading@10 278 ALIGN_DWORD_D 1, r6, r7, r4, r5, r10
yading@10 279 ALIGN_DWORD_D 2, r8, r9, r4, r5, r10
yading@10 280 pld [r1]
yading@10 281 NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
yading@10 282 subs r3, r3, #1
yading@10 283 stm r0, {r4-r5}
yading@10 284 add r0, r0, r2
yading@10 285 bne 2b
yading@10 286 pop {r4-r10,pc}
yading@10 287 .align 5
yading@10 288 3:
yading@10 289 ldm r1, {r4-r5, r10}
yading@10 290 add r1, r1, r2
yading@10 291 ALIGN_DWORD_D 2, r6, r7, r4, r5, r10
yading@10 292 ALIGN_DWORD_D 3, r8, r9, r4, r5, r10
yading@10 293 pld [r1]
yading@10 294 NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
yading@10 295 subs r3, r3, #1
yading@10 296 stm r0, {r4-r5}
yading@10 297 add r0, r0, r2
yading@10 298 bne 3b
yading@10 299 pop {r4-r10,pc}
yading@10 300 .align 5
yading@10 301 4:
yading@10 302 ldm r1, {r4-r5, r10}
yading@10 303 add r1, r1, r2
yading@10 304 ALIGN_DWORD_D 3, r6, r7, r4, r5, r10
yading@10 305 pld [r1]
yading@10 306 NO_RND_AVG32 r8, r9, r6, r7, r5, r10, r12
yading@10 307 subs r3, r3, #1
yading@10 308 stm r0, {r8-r9}
yading@10 309 add r0, r0, r2
yading@10 310 bne 4b
yading@10 311 pop {r4-r10,pc}
yading@10 312 endfunc
yading@10 313
yading@10 314
yading@10 315 @ ----------------------------------------------------------------
yading@10 316 .align 5
yading@10 317 function ff_put_pixels8_y2_arm, export=1
yading@10 318 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
yading@10 319 @ block = word aligned, pixles = unaligned
yading@10 320 pld [r1]
yading@10 321 push {r4-r11,lr}
yading@10 322 mov r3, r3, lsr #1
yading@10 323 ldr r12, =0xfefefefe
yading@10 324 JMP_ALIGN r5, r1
yading@10 325 1:
yading@10 326 ldm r1, {r4-r5}
yading@10 327 add r1, r1, r2
yading@10 328 6: ldm r1, {r6-r7}
yading@10 329 add r1, r1, r2
yading@10 330 pld [r1]
yading@10 331 RND_AVG32 r8, r9, r4, r5, r6, r7, r12
yading@10 332 ldm r1, {r4-r5}
yading@10 333 add r1, r1, r2
yading@10 334 stm r0, {r8-r9}
yading@10 335 add r0, r0, r2
yading@10 336 pld [r1]
yading@10 337 RND_AVG32 r8, r9, r6, r7, r4, r5, r12
yading@10 338 subs r3, r3, #1
yading@10 339 stm r0, {r8-r9}
yading@10 340 add r0, r0, r2
yading@10 341 bne 6b
yading@10 342 pop {r4-r11,pc}
yading@10 343 .align 5
yading@10 344 2:
yading@10 345 ldm r1, {r4-r6}
yading@10 346 add r1, r1, r2
yading@10 347 pld [r1]
yading@10 348 ALIGN_DWORD 1, r4, r5, r6
yading@10 349 6: ldm r1, {r7-r9}
yading@10 350 add r1, r1, r2
yading@10 351 pld [r1]
yading@10 352 ALIGN_DWORD 1, r7, r8, r9
yading@10 353 RND_AVG32 r10, r11, r4, r5, r7, r8, r12
yading@10 354 stm r0, {r10-r11}
yading@10 355 add r0, r0, r2
yading@10 356 ldm r1, {r4-r6}
yading@10 357 add r1, r1, r2
yading@10 358 pld [r1]
yading@10 359 ALIGN_DWORD 1, r4, r5, r6
yading@10 360 subs r3, r3, #1
yading@10 361 RND_AVG32 r10, r11, r7, r8, r4, r5, r12
yading@10 362 stm r0, {r10-r11}
yading@10 363 add r0, r0, r2
yading@10 364 bne 6b
yading@10 365 pop {r4-r11,pc}
yading@10 366 .align 5
yading@10 367 3:
yading@10 368 ldm r1, {r4-r6}
yading@10 369 add r1, r1, r2
yading@10 370 pld [r1]
yading@10 371 ALIGN_DWORD 2, r4, r5, r6
yading@10 372 6: ldm r1, {r7-r9}
yading@10 373 add r1, r1, r2
yading@10 374 pld [r1]
yading@10 375 ALIGN_DWORD 2, r7, r8, r9
yading@10 376 RND_AVG32 r10, r11, r4, r5, r7, r8, r12
yading@10 377 stm r0, {r10-r11}
yading@10 378 add r0, r0, r2
yading@10 379 ldm r1, {r4-r6}
yading@10 380 add r1, r1, r2
yading@10 381 pld [r1]
yading@10 382 ALIGN_DWORD 2, r4, r5, r6
yading@10 383 subs r3, r3, #1
yading@10 384 RND_AVG32 r10, r11, r7, r8, r4, r5, r12
yading@10 385 stm r0, {r10-r11}
yading@10 386 add r0, r0, r2
yading@10 387 bne 6b
yading@10 388 pop {r4-r11,pc}
yading@10 389 .align 5
yading@10 390 4:
yading@10 391 ldm r1, {r4-r6}
yading@10 392 add r1, r1, r2
yading@10 393 pld [r1]
yading@10 394 ALIGN_DWORD 3, r4, r5, r6
yading@10 395 6: ldm r1, {r7-r9}
yading@10 396 add r1, r1, r2
yading@10 397 pld [r1]
yading@10 398 ALIGN_DWORD 3, r7, r8, r9
yading@10 399 RND_AVG32 r10, r11, r4, r5, r7, r8, r12
yading@10 400 stm r0, {r10-r11}
yading@10 401 add r0, r0, r2
yading@10 402 ldm r1, {r4-r6}
yading@10 403 add r1, r1, r2
yading@10 404 pld [r1]
yading@10 405 ALIGN_DWORD 3, r4, r5, r6
yading@10 406 subs r3, r3, #1
yading@10 407 RND_AVG32 r10, r11, r7, r8, r4, r5, r12
yading@10 408 stm r0, {r10-r11}
yading@10 409 add r0, r0, r2
yading@10 410 bne 6b
yading@10 411 pop {r4-r11,pc}
yading@10 412 endfunc
yading@10 413
yading@10 414 .align 5
yading@10 415 function ff_put_no_rnd_pixels8_y2_arm, export=1
yading@10 416 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
yading@10 417 @ block = word aligned, pixles = unaligned
yading@10 418 pld [r1]
yading@10 419 push {r4-r11,lr}
yading@10 420 mov r3, r3, lsr #1
yading@10 421 ldr r12, =0xfefefefe
yading@10 422 JMP_ALIGN r5, r1
yading@10 423 1:
yading@10 424 ldm r1, {r4-r5}
yading@10 425 add r1, r1, r2
yading@10 426 6: ldm r1, {r6-r7}
yading@10 427 add r1, r1, r2
yading@10 428 pld [r1]
yading@10 429 NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
yading@10 430 ldm r1, {r4-r5}
yading@10 431 add r1, r1, r2
yading@10 432 stm r0, {r8-r9}
yading@10 433 add r0, r0, r2
yading@10 434 pld [r1]
yading@10 435 NO_RND_AVG32 r8, r9, r6, r7, r4, r5, r12
yading@10 436 subs r3, r3, #1
yading@10 437 stm r0, {r8-r9}
yading@10 438 add r0, r0, r2
yading@10 439 bne 6b
yading@10 440 pop {r4-r11,pc}
yading@10 441 .align 5
yading@10 442 2:
yading@10 443 ldm r1, {r4-r6}
yading@10 444 add r1, r1, r2
yading@10 445 pld [r1]
yading@10 446 ALIGN_DWORD 1, r4, r5, r6
yading@10 447 6: ldm r1, {r7-r9}
yading@10 448 add r1, r1, r2
yading@10 449 pld [r1]
yading@10 450 ALIGN_DWORD 1, r7, r8, r9
yading@10 451 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
yading@10 452 stm r0, {r10-r11}
yading@10 453 add r0, r0, r2
yading@10 454 ldm r1, {r4-r6}
yading@10 455 add r1, r1, r2
yading@10 456 pld [r1]
yading@10 457 ALIGN_DWORD 1, r4, r5, r6
yading@10 458 subs r3, r3, #1
yading@10 459 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
yading@10 460 stm r0, {r10-r11}
yading@10 461 add r0, r0, r2
yading@10 462 bne 6b
yading@10 463 pop {r4-r11,pc}
yading@10 464 .align 5
yading@10 465 3:
yading@10 466 ldm r1, {r4-r6}
yading@10 467 add r1, r1, r2
yading@10 468 pld [r1]
yading@10 469 ALIGN_DWORD 2, r4, r5, r6
yading@10 470 6: ldm r1, {r7-r9}
yading@10 471 add r1, r1, r2
yading@10 472 pld [r1]
yading@10 473 ALIGN_DWORD 2, r7, r8, r9
yading@10 474 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
yading@10 475 stm r0, {r10-r11}
yading@10 476 add r0, r0, r2
yading@10 477 ldm r1, {r4-r6}
yading@10 478 add r1, r1, r2
yading@10 479 pld [r1]
yading@10 480 ALIGN_DWORD 2, r4, r5, r6
yading@10 481 subs r3, r3, #1
yading@10 482 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
yading@10 483 stm r0, {r10-r11}
yading@10 484 add r0, r0, r2
yading@10 485 bne 6b
yading@10 486 pop {r4-r11,pc}
yading@10 487 .align 5
yading@10 488 4:
yading@10 489 ldm r1, {r4-r6}
yading@10 490 add r1, r1, r2
yading@10 491 pld [r1]
yading@10 492 ALIGN_DWORD 3, r4, r5, r6
yading@10 493 6: ldm r1, {r7-r9}
yading@10 494 add r1, r1, r2
yading@10 495 pld [r1]
yading@10 496 ALIGN_DWORD 3, r7, r8, r9
yading@10 497 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
yading@10 498 stm r0, {r10-r11}
yading@10 499 add r0, r0, r2
yading@10 500 ldm r1, {r4-r6}
yading@10 501 add r1, r1, r2
yading@10 502 pld [r1]
yading@10 503 ALIGN_DWORD 3, r4, r5, r6
yading@10 504 subs r3, r3, #1
yading@10 505 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
yading@10 506 stm r0, {r10-r11}
yading@10 507 add r0, r0, r2
yading@10 508 bne 6b
yading@10 509 pop {r4-r11,pc}
yading@10 510 endfunc
yading@10 511
yading@10 512 .ltorg
yading@10 513
yading@10 514 @ ----------------------------------------------------------------
yading@10 515 .macro RND_XY2_IT align, rnd
yading@10 516 @ l1= (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202)
yading@10 517 @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2)
yading@10 518 .if \align == 0
yading@10 519 ldm r1, {r6-r8}
yading@10 520 .elseif \align == 3
yading@10 521 ldm r1, {r5-r7}
yading@10 522 .else
yading@10 523 ldm r1, {r8-r10}
yading@10 524 .endif
yading@10 525 add r1, r1, r2
yading@10 526 pld [r1]
yading@10 527 .if \align == 0
yading@10 528 ALIGN_DWORD_D 1, r4, r5, r6, r7, r8
yading@10 529 .elseif \align == 1
yading@10 530 ALIGN_DWORD_D 1, r4, r5, r8, r9, r10
yading@10 531 ALIGN_DWORD_D 2, r6, r7, r8, r9, r10
yading@10 532 .elseif \align == 2
yading@10 533 ALIGN_DWORD_D 2, r4, r5, r8, r9, r10
yading@10 534 ALIGN_DWORD_D 3, r6, r7, r8, r9, r10
yading@10 535 .elseif \align == 3
yading@10 536 ALIGN_DWORD_D 3, r4, r5, r5, r6, r7
yading@10 537 .endif
yading@10 538 ldr r14, =0x03030303
yading@10 539 tst r3, #1
yading@10 540 and r8, r4, r14
yading@10 541 and r9, r5, r14
yading@10 542 and r10, r6, r14
yading@10 543 and r11, r7, r14
yading@10 544 it eq
yading@10 545 andeq r14, r14, r14, \rnd #1
yading@10 546 add r8, r8, r10
yading@10 547 add r9, r9, r11
yading@10 548 ldr r12, =0xfcfcfcfc >> 2
yading@10 549 itt eq
yading@10 550 addeq r8, r8, r14
yading@10 551 addeq r9, r9, r14
yading@10 552 and r4, r12, r4, lsr #2
yading@10 553 and r5, r12, r5, lsr #2
yading@10 554 and r6, r12, r6, lsr #2
yading@10 555 and r7, r12, r7, lsr #2
yading@10 556 add r10, r4, r6
yading@10 557 add r11, r5, r7
yading@10 558 subs r3, r3, #1
yading@10 559 .endm
yading@10 560
yading@10 561 .macro RND_XY2_EXPAND align, rnd
yading@10 562 RND_XY2_IT \align, \rnd
yading@10 563 6: push {r8-r11}
yading@10 564 RND_XY2_IT \align, \rnd
yading@10 565 pop {r4-r7}
yading@10 566 add r4, r4, r8
yading@10 567 add r5, r5, r9
yading@10 568 ldr r14, =0x0f0f0f0f
yading@10 569 add r6, r6, r10
yading@10 570 add r7, r7, r11
yading@10 571 and r4, r14, r4, lsr #2
yading@10 572 and r5, r14, r5, lsr #2
yading@10 573 add r4, r4, r6
yading@10 574 add r5, r5, r7
yading@10 575 stm r0, {r4-r5}
yading@10 576 add r0, r0, r2
yading@10 577 bge 6b
yading@10 578 pop {r4-r11,pc}
yading@10 579 .endm
yading@10 580
yading@10 581 .align 5
yading@10 582 function ff_put_pixels8_xy2_arm, export=1
yading@10 583 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
yading@10 584 @ block = word aligned, pixles = unaligned
yading@10 585 pld [r1]
yading@10 586 push {r4-r11,lr} @ R14 is also called LR
yading@10 587 JMP_ALIGN r5, r1
yading@10 588 1: RND_XY2_EXPAND 0, lsl
yading@10 589 .align 5
yading@10 590 2: RND_XY2_EXPAND 1, lsl
yading@10 591 .align 5
yading@10 592 3: RND_XY2_EXPAND 2, lsl
yading@10 593 .align 5
yading@10 594 4: RND_XY2_EXPAND 3, lsl
yading@10 595 endfunc
yading@10 596
yading@10 597 .align 5
yading@10 598 function ff_put_no_rnd_pixels8_xy2_arm, export=1
yading@10 599 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
yading@10 600 @ block = word aligned, pixles = unaligned
yading@10 601 pld [r1]
yading@10 602 push {r4-r11,lr}
yading@10 603 JMP_ALIGN r5, r1
yading@10 604 1: RND_XY2_EXPAND 0, lsr
yading@10 605 .align 5
yading@10 606 2: RND_XY2_EXPAND 1, lsr
yading@10 607 .align 5
yading@10 608 3: RND_XY2_EXPAND 2, lsr
yading@10 609 .align 5
yading@10 610 4: RND_XY2_EXPAND 3, lsr
yading@10 611 endfunc