annotate ffmpeg/libavcodec/arm/dsputil_armv6.S @ 13:844d341cf643 tip

Back up before ISMIR
author Yading Song <yading.song@eecs.qmul.ac.uk>
date Thu, 31 Oct 2013 13:17:06 +0000
parents 6840f77b83aa
children
rev   line source
yading@10 1 /*
yading@10 2 * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
yading@10 3 *
yading@10 4 * This file is part of FFmpeg.
yading@10 5 *
yading@10 6 * FFmpeg is free software; you can redistribute it and/or
yading@10 7 * modify it under the terms of the GNU Lesser General Public
yading@10 8 * License as published by the Free Software Foundation; either
yading@10 9 * version 2.1 of the License, or (at your option) any later version.
yading@10 10 *
yading@10 11 * FFmpeg is distributed in the hope that it will be useful,
yading@10 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
yading@10 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
yading@10 14 * Lesser General Public License for more details.
yading@10 15 *
yading@10 16 * You should have received a copy of the GNU Lesser General Public
yading@10 17 * License along with FFmpeg; if not, write to the Free Software
yading@10 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
yading@10 19 */
yading@10 20
yading@10 21 #include "libavutil/arm/asm.S"
yading@10 22
yading@10 23 function ff_add_pixels_clamped_armv6, export=1
yading@10 24 push {r4-r8,lr}
yading@10 25 mov r3, #8
yading@10 26 1:
yading@10 27 ldm r0!, {r4,r5,r12,lr}
yading@10 28 ldrd r6, r7, [r1]
yading@10 29 pkhbt r8, r4, r5, lsl #16
yading@10 30 pkhtb r5, r5, r4, asr #16
yading@10 31 pkhbt r4, r12, lr, lsl #16
yading@10 32 pkhtb lr, lr, r12, asr #16
yading@10 33 pld [r1, r2]
yading@10 34 uxtab16 r8, r8, r6
yading@10 35 uxtab16 r5, r5, r6, ror #8
yading@10 36 uxtab16 r4, r4, r7
yading@10 37 uxtab16 lr, lr, r7, ror #8
yading@10 38 usat16 r8, #8, r8
yading@10 39 usat16 r5, #8, r5
yading@10 40 usat16 r4, #8, r4
yading@10 41 usat16 lr, #8, lr
yading@10 42 orr r6, r8, r5, lsl #8
yading@10 43 orr r7, r4, lr, lsl #8
yading@10 44 subs r3, r3, #1
yading@10 45 strd_post r6, r7, r1, r2
yading@10 46 bgt 1b
yading@10 47 pop {r4-r8,pc}
yading@10 48 endfunc
yading@10 49
yading@10 50 function ff_get_pixels_armv6, export=1
yading@10 51 pld [r1, r2]
yading@10 52 push {r4-r8, lr}
yading@10 53 mov lr, #8
yading@10 54 1:
yading@10 55 ldrd_post r4, r5, r1, r2
yading@10 56 subs lr, lr, #1
yading@10 57 uxtb16 r6, r4
yading@10 58 uxtb16 r4, r4, ror #8
yading@10 59 uxtb16 r12, r5
yading@10 60 uxtb16 r8, r5, ror #8
yading@10 61 pld [r1, r2]
yading@10 62 pkhbt r5, r6, r4, lsl #16
yading@10 63 pkhtb r6, r4, r6, asr #16
yading@10 64 pkhbt r7, r12, r8, lsl #16
yading@10 65 pkhtb r12, r8, r12, asr #16
yading@10 66 stm r0!, {r5,r6,r7,r12}
yading@10 67 bgt 1b
yading@10 68
yading@10 69 pop {r4-r8, pc}
yading@10 70 endfunc
yading@10 71
yading@10 72 function ff_diff_pixels_armv6, export=1
yading@10 73 pld [r1, r3]
yading@10 74 pld [r2, r3]
yading@10 75 push {r4-r9, lr}
yading@10 76 mov lr, #8
yading@10 77 1:
yading@10 78 ldrd_post r4, r5, r1, r3
yading@10 79 ldrd_post r6, r7, r2, r3
yading@10 80 uxtb16 r8, r4
yading@10 81 uxtb16 r4, r4, ror #8
yading@10 82 uxtb16 r9, r6
yading@10 83 uxtb16 r6, r6, ror #8
yading@10 84 pld [r1, r3]
yading@10 85 ssub16 r9, r8, r9
yading@10 86 ssub16 r6, r4, r6
yading@10 87 uxtb16 r8, r5
yading@10 88 uxtb16 r5, r5, ror #8
yading@10 89 pld [r2, r3]
yading@10 90 pkhbt r4, r9, r6, lsl #16
yading@10 91 pkhtb r6, r6, r9, asr #16
yading@10 92 uxtb16 r9, r7
yading@10 93 uxtb16 r7, r7, ror #8
yading@10 94 ssub16 r9, r8, r9
yading@10 95 ssub16 r5, r5, r7
yading@10 96 subs lr, lr, #1
yading@10 97 pkhbt r8, r9, r5, lsl #16
yading@10 98 pkhtb r9, r5, r9, asr #16
yading@10 99 stm r0!, {r4,r6,r8,r9}
yading@10 100 bgt 1b
yading@10 101
yading@10 102 pop {r4-r9, pc}
yading@10 103 endfunc
yading@10 104
yading@10 105 function ff_pix_abs16_armv6, export=1
yading@10 106 ldr r0, [sp]
yading@10 107 push {r4-r9, lr}
yading@10 108 mov r12, #0
yading@10 109 mov lr, #0
yading@10 110 ldm r1, {r4-r7}
yading@10 111 ldr r8, [r2]
yading@10 112 1:
yading@10 113 ldr r9, [r2, #4]
yading@10 114 pld [r1, r3]
yading@10 115 usada8 r12, r4, r8, r12
yading@10 116 ldr r8, [r2, #8]
yading@10 117 pld [r2, r3]
yading@10 118 usada8 lr, r5, r9, lr
yading@10 119 ldr r9, [r2, #12]
yading@10 120 usada8 r12, r6, r8, r12
yading@10 121 subs r0, r0, #1
yading@10 122 usada8 lr, r7, r9, lr
yading@10 123 beq 2f
yading@10 124 add r1, r1, r3
yading@10 125 ldm r1, {r4-r7}
yading@10 126 add r2, r2, r3
yading@10 127 ldr r8, [r2]
yading@10 128 b 1b
yading@10 129 2:
yading@10 130 add r0, r12, lr
yading@10 131 pop {r4-r9, pc}
yading@10 132 endfunc
yading@10 133
yading@10 134 function ff_pix_abs16_x2_armv6, export=1
yading@10 135 ldr r12, [sp]
yading@10 136 push {r4-r11, lr}
yading@10 137 mov r0, #0
yading@10 138 mov lr, #1
yading@10 139 orr lr, lr, lr, lsl #8
yading@10 140 orr lr, lr, lr, lsl #16
yading@10 141 1:
yading@10 142 ldr r8, [r2]
yading@10 143 ldr r9, [r2, #4]
yading@10 144 lsr r10, r8, #8
yading@10 145 ldr r4, [r1]
yading@10 146 lsr r6, r9, #8
yading@10 147 orr r10, r10, r9, lsl #24
yading@10 148 ldr r5, [r2, #8]
yading@10 149 eor r11, r8, r10
yading@10 150 uhadd8 r7, r8, r10
yading@10 151 orr r6, r6, r5, lsl #24
yading@10 152 and r11, r11, lr
yading@10 153 uadd8 r7, r7, r11
yading@10 154 ldr r8, [r1, #4]
yading@10 155 usada8 r0, r4, r7, r0
yading@10 156 eor r7, r9, r6
yading@10 157 lsr r10, r5, #8
yading@10 158 and r7, r7, lr
yading@10 159 uhadd8 r4, r9, r6
yading@10 160 ldr r6, [r2, #12]
yading@10 161 uadd8 r4, r4, r7
yading@10 162 pld [r1, r3]
yading@10 163 orr r10, r10, r6, lsl #24
yading@10 164 usada8 r0, r8, r4, r0
yading@10 165 ldr r4, [r1, #8]
yading@10 166 eor r11, r5, r10
yading@10 167 ldrb r7, [r2, #16]
yading@10 168 and r11, r11, lr
yading@10 169 uhadd8 r8, r5, r10
yading@10 170 ldr r5, [r1, #12]
yading@10 171 uadd8 r8, r8, r11
yading@10 172 pld [r2, r3]
yading@10 173 lsr r10, r6, #8
yading@10 174 usada8 r0, r4, r8, r0
yading@10 175 orr r10, r10, r7, lsl #24
yading@10 176 subs r12, r12, #1
yading@10 177 eor r11, r6, r10
yading@10 178 add r1, r1, r3
yading@10 179 uhadd8 r9, r6, r10
yading@10 180 and r11, r11, lr
yading@10 181 uadd8 r9, r9, r11
yading@10 182 add r2, r2, r3
yading@10 183 usada8 r0, r5, r9, r0
yading@10 184 bgt 1b
yading@10 185
yading@10 186 pop {r4-r11, pc}
yading@10 187 endfunc
yading@10 188
yading@10 189 .macro usad_y2 p0, p1, p2, p3, n0, n1, n2, n3
yading@10 190 ldr \n0, [r2]
yading@10 191 eor \n1, \p0, \n0
yading@10 192 uhadd8 \p0, \p0, \n0
yading@10 193 and \n1, \n1, lr
yading@10 194 ldr \n2, [r1]
yading@10 195 uadd8 \p0, \p0, \n1
yading@10 196 ldr \n1, [r2, #4]
yading@10 197 usada8 r0, \p0, \n2, r0
yading@10 198 pld [r1, r3]
yading@10 199 eor \n3, \p1, \n1
yading@10 200 uhadd8 \p1, \p1, \n1
yading@10 201 and \n3, \n3, lr
yading@10 202 ldr \p0, [r1, #4]
yading@10 203 uadd8 \p1, \p1, \n3
yading@10 204 ldr \n2, [r2, #8]
yading@10 205 usada8 r0, \p1, \p0, r0
yading@10 206 pld [r2, r3]
yading@10 207 eor \p0, \p2, \n2
yading@10 208 uhadd8 \p2, \p2, \n2
yading@10 209 and \p0, \p0, lr
yading@10 210 ldr \p1, [r1, #8]
yading@10 211 uadd8 \p2, \p2, \p0
yading@10 212 ldr \n3, [r2, #12]
yading@10 213 usada8 r0, \p2, \p1, r0
yading@10 214 eor \p1, \p3, \n3
yading@10 215 uhadd8 \p3, \p3, \n3
yading@10 216 and \p1, \p1, lr
yading@10 217 ldr \p0, [r1, #12]
yading@10 218 uadd8 \p3, \p3, \p1
yading@10 219 add r1, r1, r3
yading@10 220 usada8 r0, \p3, \p0, r0
yading@10 221 add r2, r2, r3
yading@10 222 .endm
yading@10 223
yading@10 224 function ff_pix_abs16_y2_armv6, export=1
yading@10 225 pld [r1]
yading@10 226 pld [r2]
yading@10 227 ldr r12, [sp]
yading@10 228 push {r4-r11, lr}
yading@10 229 mov r0, #0
yading@10 230 mov lr, #1
yading@10 231 orr lr, lr, lr, lsl #8
yading@10 232 orr lr, lr, lr, lsl #16
yading@10 233 ldr r4, [r2]
yading@10 234 ldr r5, [r2, #4]
yading@10 235 ldr r6, [r2, #8]
yading@10 236 ldr r7, [r2, #12]
yading@10 237 add r2, r2, r3
yading@10 238 1:
yading@10 239 usad_y2 r4, r5, r6, r7, r8, r9, r10, r11
yading@10 240 subs r12, r12, #2
yading@10 241 usad_y2 r8, r9, r10, r11, r4, r5, r6, r7
yading@10 242 bgt 1b
yading@10 243
yading@10 244 pop {r4-r11, pc}
yading@10 245 endfunc
yading@10 246
yading@10 247 function ff_pix_abs8_armv6, export=1
yading@10 248 pld [r2, r3]
yading@10 249 ldr r12, [sp]
yading@10 250 push {r4-r9, lr}
yading@10 251 mov r0, #0
yading@10 252 mov lr, #0
yading@10 253 ldrd_post r4, r5, r1, r3
yading@10 254 1:
yading@10 255 subs r12, r12, #2
yading@10 256 ldr r7, [r2, #4]
yading@10 257 ldr_post r6, r2, r3
yading@10 258 ldrd_post r8, r9, r1, r3
yading@10 259 usada8 r0, r4, r6, r0
yading@10 260 pld [r2, r3]
yading@10 261 usada8 lr, r5, r7, lr
yading@10 262 ldr r7, [r2, #4]
yading@10 263 ldr_post r6, r2, r3
yading@10 264 beq 2f
yading@10 265 ldrd_post r4, r5, r1, r3
yading@10 266 usada8 r0, r8, r6, r0
yading@10 267 pld [r2, r3]
yading@10 268 usada8 lr, r9, r7, lr
yading@10 269 b 1b
yading@10 270 2:
yading@10 271 usada8 r0, r8, r6, r0
yading@10 272 usada8 lr, r9, r7, lr
yading@10 273 add r0, r0, lr
yading@10 274 pop {r4-r9, pc}
yading@10 275 endfunc
yading@10 276
yading@10 277 function ff_sse16_armv6, export=1
yading@10 278 ldr r12, [sp]
yading@10 279 push {r4-r9, lr}
yading@10 280 mov r0, #0
yading@10 281 1:
yading@10 282 ldrd r4, r5, [r1]
yading@10 283 ldr r8, [r2]
yading@10 284 uxtb16 lr, r4
yading@10 285 uxtb16 r4, r4, ror #8
yading@10 286 uxtb16 r9, r8
yading@10 287 uxtb16 r8, r8, ror #8
yading@10 288 ldr r7, [r2, #4]
yading@10 289 usub16 lr, lr, r9
yading@10 290 usub16 r4, r4, r8
yading@10 291 smlad r0, lr, lr, r0
yading@10 292 uxtb16 r6, r5
yading@10 293 uxtb16 lr, r5, ror #8
yading@10 294 uxtb16 r8, r7
yading@10 295 uxtb16 r9, r7, ror #8
yading@10 296 smlad r0, r4, r4, r0
yading@10 297 ldrd r4, r5, [r1, #8]
yading@10 298 usub16 r6, r6, r8
yading@10 299 usub16 r8, lr, r9
yading@10 300 ldr r7, [r2, #8]
yading@10 301 smlad r0, r6, r6, r0
yading@10 302 uxtb16 lr, r4
yading@10 303 uxtb16 r4, r4, ror #8
yading@10 304 uxtb16 r9, r7
yading@10 305 uxtb16 r7, r7, ror #8
yading@10 306 smlad r0, r8, r8, r0
yading@10 307 ldr r8, [r2, #12]
yading@10 308 usub16 lr, lr, r9
yading@10 309 usub16 r4, r4, r7
yading@10 310 smlad r0, lr, lr, r0
yading@10 311 uxtb16 r6, r5
yading@10 312 uxtb16 r5, r5, ror #8
yading@10 313 uxtb16 r9, r8
yading@10 314 uxtb16 r8, r8, ror #8
yading@10 315 smlad r0, r4, r4, r0
yading@10 316 usub16 r6, r6, r9
yading@10 317 usub16 r5, r5, r8
yading@10 318 smlad r0, r6, r6, r0
yading@10 319 add r1, r1, r3
yading@10 320 add r2, r2, r3
yading@10 321 subs r12, r12, #1
yading@10 322 smlad r0, r5, r5, r0
yading@10 323 bgt 1b
yading@10 324
yading@10 325 pop {r4-r9, pc}
yading@10 326 endfunc
yading@10 327
yading@10 328 function ff_pix_norm1_armv6, export=1
yading@10 329 push {r4-r6, lr}
yading@10 330 mov r12, #16
yading@10 331 mov lr, #0
yading@10 332 1:
yading@10 333 ldm r0, {r2-r5}
yading@10 334 uxtb16 r6, r2
yading@10 335 uxtb16 r2, r2, ror #8
yading@10 336 smlad lr, r6, r6, lr
yading@10 337 uxtb16 r6, r3
yading@10 338 smlad lr, r2, r2, lr
yading@10 339 uxtb16 r3, r3, ror #8
yading@10 340 smlad lr, r6, r6, lr
yading@10 341 uxtb16 r6, r4
yading@10 342 smlad lr, r3, r3, lr
yading@10 343 uxtb16 r4, r4, ror #8
yading@10 344 smlad lr, r6, r6, lr
yading@10 345 uxtb16 r6, r5
yading@10 346 smlad lr, r4, r4, lr
yading@10 347 uxtb16 r5, r5, ror #8
yading@10 348 smlad lr, r6, r6, lr
yading@10 349 subs r12, r12, #1
yading@10 350 add r0, r0, r1
yading@10 351 smlad lr, r5, r5, lr
yading@10 352 bgt 1b
yading@10 353
yading@10 354 mov r0, lr
yading@10 355 pop {r4-r6, pc}
yading@10 356 endfunc
yading@10 357
yading@10 358 function ff_pix_sum_armv6, export=1
yading@10 359 push {r4-r7, lr}
yading@10 360 mov r12, #16
yading@10 361 mov r2, #0
yading@10 362 mov r3, #0
yading@10 363 mov lr, #0
yading@10 364 ldr r4, [r0]
yading@10 365 1:
yading@10 366 subs r12, r12, #1
yading@10 367 ldr r5, [r0, #4]
yading@10 368 usada8 r2, r4, lr, r2
yading@10 369 ldr r6, [r0, #8]
yading@10 370 usada8 r3, r5, lr, r3
yading@10 371 ldr r7, [r0, #12]
yading@10 372 usada8 r2, r6, lr, r2
yading@10 373 beq 2f
yading@10 374 ldr_pre r4, r0, r1
yading@10 375 usada8 r3, r7, lr, r3
yading@10 376 bgt 1b
yading@10 377 2:
yading@10 378 usada8 r3, r7, lr, r3
yading@10 379 add r0, r2, r3
yading@10 380 pop {r4-r7, pc}
yading@10 381 endfunc