annotate ffmpeg/libavcodec/arm/h264qpel_neon.S @ 13:844d341cf643 tip

Back up before ISMIR
author Yading Song <yading.song@eecs.qmul.ac.uk>
date Thu, 31 Oct 2013 13:17:06 +0000
parents 6840f77b83aa
children
rev   line source
yading@10 1 /*
yading@10 2 * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
yading@10 3 *
yading@10 4 * This file is part of FFmpeg.
yading@10 5 *
yading@10 6 * FFmpeg is free software; you can redistribute it and/or
yading@10 7 * modify it under the terms of the GNU Lesser General Public
yading@10 8 * License as published by the Free Software Foundation; either
yading@10 9 * version 2.1 of the License, or (at your option) any later version.
yading@10 10 *
yading@10 11 * FFmpeg is distributed in the hope that it will be useful,
yading@10 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
yading@10 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
yading@10 14 * Lesser General Public License for more details.
yading@10 15 *
yading@10 16 * You should have received a copy of the GNU Lesser General Public
yading@10 17 * License along with FFmpeg; if not, write to the Free Software
yading@10 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
yading@10 19 */
yading@10 20
yading@10 21 #include "libavutil/arm/asm.S"
yading@10 22 #include "neon.S"
yading@10 23
yading@10 24 /* H.264 qpel MC */
yading@10 25
yading@10 26 .macro lowpass_const r
yading@10 27 movw \r, #5
yading@10 28 movt \r, #20
yading@10 29 vmov.32 d6[0], \r
yading@10 30 .endm
yading@10 31
yading@10 32 .macro lowpass_8 r0, r1, r2, r3, d0, d1, narrow=1
yading@10 33 .if \narrow
yading@10 34 t0 .req q0
yading@10 35 t1 .req q8
yading@10 36 .else
yading@10 37 t0 .req \d0
yading@10 38 t1 .req \d1
yading@10 39 .endif
yading@10 40 vext.8 d2, \r0, \r1, #2
yading@10 41 vext.8 d3, \r0, \r1, #3
yading@10 42 vaddl.u8 q1, d2, d3
yading@10 43 vext.8 d4, \r0, \r1, #1
yading@10 44 vext.8 d5, \r0, \r1, #4
yading@10 45 vaddl.u8 q2, d4, d5
yading@10 46 vext.8 d30, \r0, \r1, #5
yading@10 47 vaddl.u8 t0, \r0, d30
yading@10 48 vext.8 d18, \r2, \r3, #2
yading@10 49 vmla.i16 t0, q1, d6[1]
yading@10 50 vext.8 d19, \r2, \r3, #3
yading@10 51 vaddl.u8 q9, d18, d19
yading@10 52 vext.8 d20, \r2, \r3, #1
yading@10 53 vmls.i16 t0, q2, d6[0]
yading@10 54 vext.8 d21, \r2, \r3, #4
yading@10 55 vaddl.u8 q10, d20, d21
yading@10 56 vext.8 d31, \r2, \r3, #5
yading@10 57 vaddl.u8 t1, \r2, d31
yading@10 58 vmla.i16 t1, q9, d6[1]
yading@10 59 vmls.i16 t1, q10, d6[0]
yading@10 60 .if \narrow
yading@10 61 vqrshrun.s16 \d0, t0, #5
yading@10 62 vqrshrun.s16 \d1, t1, #5
yading@10 63 .endif
yading@10 64 .unreq t0
yading@10 65 .unreq t1
yading@10 66 .endm
yading@10 67
yading@10 68 .macro lowpass_8_1 r0, r1, d0, narrow=1
yading@10 69 .if \narrow
yading@10 70 t0 .req q0
yading@10 71 .else
yading@10 72 t0 .req \d0
yading@10 73 .endif
yading@10 74 vext.8 d2, \r0, \r1, #2
yading@10 75 vext.8 d3, \r0, \r1, #3
yading@10 76 vaddl.u8 q1, d2, d3
yading@10 77 vext.8 d4, \r0, \r1, #1
yading@10 78 vext.8 d5, \r0, \r1, #4
yading@10 79 vaddl.u8 q2, d4, d5
yading@10 80 vext.8 d30, \r0, \r1, #5
yading@10 81 vaddl.u8 t0, \r0, d30
yading@10 82 vmla.i16 t0, q1, d6[1]
yading@10 83 vmls.i16 t0, q2, d6[0]
yading@10 84 .if \narrow
yading@10 85 vqrshrun.s16 \d0, t0, #5
yading@10 86 .endif
yading@10 87 .unreq t0
yading@10 88 .endm
yading@10 89
yading@10 90 .macro lowpass_8.16 r0, r1, l0, h0, l1, h1, d
yading@10 91 vext.16 q1, \r0, \r1, #2
yading@10 92 vext.16 q0, \r0, \r1, #3
yading@10 93 vaddl.s16 q9, d2, d0
yading@10 94 vext.16 q2, \r0, \r1, #1
yading@10 95 vaddl.s16 q1, d3, d1
yading@10 96 vext.16 q3, \r0, \r1, #4
yading@10 97 vaddl.s16 q10, d4, d6
yading@10 98 vext.16 \r1, \r0, \r1, #5
yading@10 99 vaddl.s16 q2, d5, d7
yading@10 100 vaddl.s16 q0, \h0, \h1
yading@10 101 vaddl.s16 q8, \l0, \l1
yading@10 102
yading@10 103 vshl.i32 q3, q9, #4
yading@10 104 vshl.i32 q9, q9, #2
yading@10 105 vshl.i32 q15, q10, #2
yading@10 106 vadd.i32 q9, q9, q3
yading@10 107 vadd.i32 q10, q10, q15
yading@10 108
yading@10 109 vshl.i32 q3, q1, #4
yading@10 110 vshl.i32 q1, q1, #2
yading@10 111 vshl.i32 q15, q2, #2
yading@10 112 vadd.i32 q1, q1, q3
yading@10 113 vadd.i32 q2, q2, q15
yading@10 114
yading@10 115 vadd.i32 q9, q9, q8
yading@10 116 vsub.i32 q9, q9, q10
yading@10 117
yading@10 118 vadd.i32 q1, q1, q0
yading@10 119 vsub.i32 q1, q1, q2
yading@10 120
yading@10 121 vrshrn.s32 d18, q9, #10
yading@10 122 vrshrn.s32 d19, q1, #10
yading@10 123
yading@10 124 vqmovun.s16 \d, q9
yading@10 125 .endm
yading@10 126
yading@10 127 function put_h264_qpel16_h_lowpass_neon_packed
yading@10 128 mov r4, lr
yading@10 129 mov r12, #16
yading@10 130 mov r3, #8
yading@10 131 bl put_h264_qpel8_h_lowpass_neon
yading@10 132 sub r1, r1, r2, lsl #4
yading@10 133 add r1, r1, #8
yading@10 134 mov r12, #16
yading@10 135 mov lr, r4
yading@10 136 b put_h264_qpel8_h_lowpass_neon
yading@10 137 endfunc
yading@10 138
yading@10 139 .macro h264_qpel_h_lowpass type
yading@10 140 function \type\()_h264_qpel16_h_lowpass_neon
yading@10 141 push {lr}
yading@10 142 mov r12, #16
yading@10 143 bl \type\()_h264_qpel8_h_lowpass_neon
yading@10 144 sub r0, r0, r3, lsl #4
yading@10 145 sub r1, r1, r2, lsl #4
yading@10 146 add r0, r0, #8
yading@10 147 add r1, r1, #8
yading@10 148 mov r12, #16
yading@10 149 pop {lr}
yading@10 150 endfunc
yading@10 151
yading@10 152 function \type\()_h264_qpel8_h_lowpass_neon
yading@10 153 1: vld1.8 {d0, d1}, [r1], r2
yading@10 154 vld1.8 {d16,d17}, [r1], r2
yading@10 155 subs r12, r12, #2
yading@10 156 lowpass_8 d0, d1, d16, d17, d0, d16
yading@10 157 .ifc \type,avg
yading@10 158 vld1.8 {d2}, [r0,:64], r3
yading@10 159 vrhadd.u8 d0, d0, d2
yading@10 160 vld1.8 {d3}, [r0,:64]
yading@10 161 vrhadd.u8 d16, d16, d3
yading@10 162 sub r0, r0, r3
yading@10 163 .endif
yading@10 164 vst1.8 {d0}, [r0,:64], r3
yading@10 165 vst1.8 {d16}, [r0,:64], r3
yading@10 166 bne 1b
yading@10 167 bx lr
yading@10 168 endfunc
yading@10 169 .endm
yading@10 170
yading@10 171 h264_qpel_h_lowpass put
yading@10 172 h264_qpel_h_lowpass avg
yading@10 173
yading@10 174 .macro h264_qpel_h_lowpass_l2 type
yading@10 175 function \type\()_h264_qpel16_h_lowpass_l2_neon
yading@10 176 push {lr}
yading@10 177 mov r12, #16
yading@10 178 bl \type\()_h264_qpel8_h_lowpass_l2_neon
yading@10 179 sub r0, r0, r2, lsl #4
yading@10 180 sub r1, r1, r2, lsl #4
yading@10 181 sub r3, r3, r2, lsl #4
yading@10 182 add r0, r0, #8
yading@10 183 add r1, r1, #8
yading@10 184 add r3, r3, #8
yading@10 185 mov r12, #16
yading@10 186 pop {lr}
yading@10 187 endfunc
yading@10 188
yading@10 189 function \type\()_h264_qpel8_h_lowpass_l2_neon
yading@10 190 1: vld1.8 {d0, d1}, [r1], r2
yading@10 191 vld1.8 {d16,d17}, [r1], r2
yading@10 192 vld1.8 {d28}, [r3], r2
yading@10 193 vld1.8 {d29}, [r3], r2
yading@10 194 subs r12, r12, #2
yading@10 195 lowpass_8 d0, d1, d16, d17, d0, d1
yading@10 196 vrhadd.u8 q0, q0, q14
yading@10 197 .ifc \type,avg
yading@10 198 vld1.8 {d2}, [r0,:64], r2
yading@10 199 vrhadd.u8 d0, d0, d2
yading@10 200 vld1.8 {d3}, [r0,:64]
yading@10 201 vrhadd.u8 d1, d1, d3
yading@10 202 sub r0, r0, r2
yading@10 203 .endif
yading@10 204 vst1.8 {d0}, [r0,:64], r2
yading@10 205 vst1.8 {d1}, [r0,:64], r2
yading@10 206 bne 1b
yading@10 207 bx lr
yading@10 208 endfunc
yading@10 209 .endm
yading@10 210
yading@10 211 h264_qpel_h_lowpass_l2 put
yading@10 212 h264_qpel_h_lowpass_l2 avg
yading@10 213
yading@10 214 function put_h264_qpel16_v_lowpass_neon_packed
yading@10 215 mov r4, lr
yading@10 216 mov r2, #8
yading@10 217 bl put_h264_qpel8_v_lowpass_neon
yading@10 218 sub r1, r1, r3, lsl #2
yading@10 219 bl put_h264_qpel8_v_lowpass_neon
yading@10 220 sub r1, r1, r3, lsl #4
yading@10 221 sub r1, r1, r3, lsl #2
yading@10 222 add r1, r1, #8
yading@10 223 bl put_h264_qpel8_v_lowpass_neon
yading@10 224 sub r1, r1, r3, lsl #2
yading@10 225 mov lr, r4
yading@10 226 b put_h264_qpel8_v_lowpass_neon
yading@10 227 endfunc
yading@10 228
yading@10 229 .macro h264_qpel_v_lowpass type
yading@10 230 function \type\()_h264_qpel16_v_lowpass_neon
yading@10 231 mov r4, lr
yading@10 232 bl \type\()_h264_qpel8_v_lowpass_neon
yading@10 233 sub r1, r1, r3, lsl #2
yading@10 234 bl \type\()_h264_qpel8_v_lowpass_neon
yading@10 235 sub r0, r0, r2, lsl #4
yading@10 236 add r0, r0, #8
yading@10 237 sub r1, r1, r3, lsl #4
yading@10 238 sub r1, r1, r3, lsl #2
yading@10 239 add r1, r1, #8
yading@10 240 bl \type\()_h264_qpel8_v_lowpass_neon
yading@10 241 sub r1, r1, r3, lsl #2
yading@10 242 mov lr, r4
yading@10 243 endfunc
yading@10 244
yading@10 245 function \type\()_h264_qpel8_v_lowpass_neon
yading@10 246 vld1.8 {d8}, [r1], r3
yading@10 247 vld1.8 {d10}, [r1], r3
yading@10 248 vld1.8 {d12}, [r1], r3
yading@10 249 vld1.8 {d14}, [r1], r3
yading@10 250 vld1.8 {d22}, [r1], r3
yading@10 251 vld1.8 {d24}, [r1], r3
yading@10 252 vld1.8 {d26}, [r1], r3
yading@10 253 vld1.8 {d28}, [r1], r3
yading@10 254 vld1.8 {d9}, [r1], r3
yading@10 255 vld1.8 {d11}, [r1], r3
yading@10 256 vld1.8 {d13}, [r1], r3
yading@10 257 vld1.8 {d15}, [r1], r3
yading@10 258 vld1.8 {d23}, [r1]
yading@10 259
yading@10 260 transpose_8x8 q4, q5, q6, q7, q11, q12, q13, q14
yading@10 261 lowpass_8 d8, d9, d10, d11, d8, d10
yading@10 262 lowpass_8 d12, d13, d14, d15, d12, d14
yading@10 263 lowpass_8 d22, d23, d24, d25, d22, d24
yading@10 264 lowpass_8 d26, d27, d28, d29, d26, d28
yading@10 265 transpose_8x8 d8, d10, d12, d14, d22, d24, d26, d28
yading@10 266
yading@10 267 .ifc \type,avg
yading@10 268 vld1.8 {d9}, [r0,:64], r2
yading@10 269 vrhadd.u8 d8, d8, d9
yading@10 270 vld1.8 {d11}, [r0,:64], r2
yading@10 271 vrhadd.u8 d10, d10, d11
yading@10 272 vld1.8 {d13}, [r0,:64], r2
yading@10 273 vrhadd.u8 d12, d12, d13
yading@10 274 vld1.8 {d15}, [r0,:64], r2
yading@10 275 vrhadd.u8 d14, d14, d15
yading@10 276 vld1.8 {d23}, [r0,:64], r2
yading@10 277 vrhadd.u8 d22, d22, d23
yading@10 278 vld1.8 {d25}, [r0,:64], r2
yading@10 279 vrhadd.u8 d24, d24, d25
yading@10 280 vld1.8 {d27}, [r0,:64], r2
yading@10 281 vrhadd.u8 d26, d26, d27
yading@10 282 vld1.8 {d29}, [r0,:64], r2
yading@10 283 vrhadd.u8 d28, d28, d29
yading@10 284 sub r0, r0, r2, lsl #3
yading@10 285 .endif
yading@10 286
yading@10 287 vst1.8 {d8}, [r0,:64], r2
yading@10 288 vst1.8 {d10}, [r0,:64], r2
yading@10 289 vst1.8 {d12}, [r0,:64], r2
yading@10 290 vst1.8 {d14}, [r0,:64], r2
yading@10 291 vst1.8 {d22}, [r0,:64], r2
yading@10 292 vst1.8 {d24}, [r0,:64], r2
yading@10 293 vst1.8 {d26}, [r0,:64], r2
yading@10 294 vst1.8 {d28}, [r0,:64], r2
yading@10 295
yading@10 296 bx lr
yading@10 297 endfunc
yading@10 298 .endm
yading@10 299
yading@10 300 h264_qpel_v_lowpass put
yading@10 301 h264_qpel_v_lowpass avg
yading@10 302
yading@10 303 .macro h264_qpel_v_lowpass_l2 type
yading@10 304 function \type\()_h264_qpel16_v_lowpass_l2_neon
yading@10 305 mov r4, lr
yading@10 306 bl \type\()_h264_qpel8_v_lowpass_l2_neon
yading@10 307 sub r1, r1, r3, lsl #2
yading@10 308 bl \type\()_h264_qpel8_v_lowpass_l2_neon
yading@10 309 sub r0, r0, r3, lsl #4
yading@10 310 sub r12, r12, r2, lsl #4
yading@10 311 add r0, r0, #8
yading@10 312 add r12, r12, #8
yading@10 313 sub r1, r1, r3, lsl #4
yading@10 314 sub r1, r1, r3, lsl #2
yading@10 315 add r1, r1, #8
yading@10 316 bl \type\()_h264_qpel8_v_lowpass_l2_neon
yading@10 317 sub r1, r1, r3, lsl #2
yading@10 318 mov lr, r4
yading@10 319 endfunc
yading@10 320
yading@10 321 function \type\()_h264_qpel8_v_lowpass_l2_neon
yading@10 322 vld1.8 {d8}, [r1], r3
yading@10 323 vld1.8 {d10}, [r1], r3
yading@10 324 vld1.8 {d12}, [r1], r3
yading@10 325 vld1.8 {d14}, [r1], r3
yading@10 326 vld1.8 {d22}, [r1], r3
yading@10 327 vld1.8 {d24}, [r1], r3
yading@10 328 vld1.8 {d26}, [r1], r3
yading@10 329 vld1.8 {d28}, [r1], r3
yading@10 330 vld1.8 {d9}, [r1], r3
yading@10 331 vld1.8 {d11}, [r1], r3
yading@10 332 vld1.8 {d13}, [r1], r3
yading@10 333 vld1.8 {d15}, [r1], r3
yading@10 334 vld1.8 {d23}, [r1]
yading@10 335
yading@10 336 transpose_8x8 q4, q5, q6, q7, q11, q12, q13, q14
yading@10 337 lowpass_8 d8, d9, d10, d11, d8, d9
yading@10 338 lowpass_8 d12, d13, d14, d15, d12, d13
yading@10 339 lowpass_8 d22, d23, d24, d25, d22, d23
yading@10 340 lowpass_8 d26, d27, d28, d29, d26, d27
yading@10 341 transpose_8x8 d8, d9, d12, d13, d22, d23, d26, d27
yading@10 342
yading@10 343 vld1.8 {d0}, [r12], r2
yading@10 344 vld1.8 {d1}, [r12], r2
yading@10 345 vld1.8 {d2}, [r12], r2
yading@10 346 vld1.8 {d3}, [r12], r2
yading@10 347 vld1.8 {d4}, [r12], r2
yading@10 348 vrhadd.u8 q0, q0, q4
yading@10 349 vld1.8 {d5}, [r12], r2
yading@10 350 vrhadd.u8 q1, q1, q6
yading@10 351 vld1.8 {d10}, [r12], r2
yading@10 352 vrhadd.u8 q2, q2, q11
yading@10 353 vld1.8 {d11}, [r12], r2
yading@10 354 vrhadd.u8 q5, q5, q13
yading@10 355
yading@10 356 .ifc \type,avg
yading@10 357 vld1.8 {d16}, [r0,:64], r3
yading@10 358 vrhadd.u8 d0, d0, d16
yading@10 359 vld1.8 {d17}, [r0,:64], r3
yading@10 360 vrhadd.u8 d1, d1, d17
yading@10 361 vld1.8 {d16}, [r0,:64], r3
yading@10 362 vrhadd.u8 d2, d2, d16
yading@10 363 vld1.8 {d17}, [r0,:64], r3
yading@10 364 vrhadd.u8 d3, d3, d17
yading@10 365 vld1.8 {d16}, [r0,:64], r3
yading@10 366 vrhadd.u8 d4, d4, d16
yading@10 367 vld1.8 {d17}, [r0,:64], r3
yading@10 368 vrhadd.u8 d5, d5, d17
yading@10 369 vld1.8 {d16}, [r0,:64], r3
yading@10 370 vrhadd.u8 d10, d10, d16
yading@10 371 vld1.8 {d17}, [r0,:64], r3
yading@10 372 vrhadd.u8 d11, d11, d17
yading@10 373 sub r0, r0, r3, lsl #3
yading@10 374 .endif
yading@10 375
yading@10 376 vst1.8 {d0}, [r0,:64], r3
yading@10 377 vst1.8 {d1}, [r0,:64], r3
yading@10 378 vst1.8 {d2}, [r0,:64], r3
yading@10 379 vst1.8 {d3}, [r0,:64], r3
yading@10 380 vst1.8 {d4}, [r0,:64], r3
yading@10 381 vst1.8 {d5}, [r0,:64], r3
yading@10 382 vst1.8 {d10}, [r0,:64], r3
yading@10 383 vst1.8 {d11}, [r0,:64], r3
yading@10 384
yading@10 385 bx lr
yading@10 386 endfunc
yading@10 387 .endm
yading@10 388
yading@10 389 h264_qpel_v_lowpass_l2 put
yading@10 390 h264_qpel_v_lowpass_l2 avg
yading@10 391
yading@10 392 function put_h264_qpel8_hv_lowpass_neon_top
yading@10 393 lowpass_const r12
yading@10 394 mov r12, #12
yading@10 395 1: vld1.8 {d0, d1}, [r1], r3
yading@10 396 vld1.8 {d16,d17}, [r1], r3
yading@10 397 subs r12, r12, #2
yading@10 398 lowpass_8 d0, d1, d16, d17, q11, q12, narrow=0
yading@10 399 vst1.8 {d22-d25}, [r4,:128]!
yading@10 400 bne 1b
yading@10 401
yading@10 402 vld1.8 {d0, d1}, [r1]
yading@10 403 lowpass_8_1 d0, d1, q12, narrow=0
yading@10 404
yading@10 405 mov r12, #-16
yading@10 406 add r4, r4, r12
yading@10 407 vld1.8 {d30,d31}, [r4,:128], r12
yading@10 408 vld1.8 {d20,d21}, [r4,:128], r12
yading@10 409 vld1.8 {d18,d19}, [r4,:128], r12
yading@10 410 vld1.8 {d16,d17}, [r4,:128], r12
yading@10 411 vld1.8 {d14,d15}, [r4,:128], r12
yading@10 412 vld1.8 {d12,d13}, [r4,:128], r12
yading@10 413 vld1.8 {d10,d11}, [r4,:128], r12
yading@10 414 vld1.8 {d8, d9}, [r4,:128], r12
yading@10 415 vld1.8 {d6, d7}, [r4,:128], r12
yading@10 416 vld1.8 {d4, d5}, [r4,:128], r12
yading@10 417 vld1.8 {d2, d3}, [r4,:128], r12
yading@10 418 vld1.8 {d0, d1}, [r4,:128]
yading@10 419
yading@10 420 swap4 d1, d3, d5, d7, d8, d10, d12, d14
yading@10 421 transpose16_4x4 q0, q1, q2, q3, q4, q5, q6, q7
yading@10 422
yading@10 423 swap4 d17, d19, d21, d31, d24, d26, d28, d22
yading@10 424 transpose16_4x4 q8, q9, q10, q15, q12, q13, q14, q11
yading@10 425
yading@10 426 vst1.8 {d30,d31}, [r4,:128]!
yading@10 427 vst1.8 {d6, d7}, [r4,:128]!
yading@10 428 vst1.8 {d20,d21}, [r4,:128]!
yading@10 429 vst1.8 {d4, d5}, [r4,:128]!
yading@10 430 vst1.8 {d18,d19}, [r4,:128]!
yading@10 431 vst1.8 {d2, d3}, [r4,:128]!
yading@10 432 vst1.8 {d16,d17}, [r4,:128]!
yading@10 433 vst1.8 {d0, d1}, [r4,:128]
yading@10 434
yading@10 435 lowpass_8.16 q4, q12, d8, d9, d24, d25, d8
yading@10 436 lowpass_8.16 q5, q13, d10, d11, d26, d27, d9
yading@10 437 lowpass_8.16 q6, q14, d12, d13, d28, d29, d10
yading@10 438 lowpass_8.16 q7, q11, d14, d15, d22, d23, d11
yading@10 439
yading@10 440 vld1.8 {d16,d17}, [r4,:128], r12
yading@10 441 vld1.8 {d30,d31}, [r4,:128], r12
yading@10 442 lowpass_8.16 q8, q15, d16, d17, d30, d31, d12
yading@10 443 vld1.8 {d16,d17}, [r4,:128], r12
yading@10 444 vld1.8 {d30,d31}, [r4,:128], r12
yading@10 445 lowpass_8.16 q8, q15, d16, d17, d30, d31, d13
yading@10 446 vld1.8 {d16,d17}, [r4,:128], r12
yading@10 447 vld1.8 {d30,d31}, [r4,:128], r12
yading@10 448 lowpass_8.16 q8, q15, d16, d17, d30, d31, d14
yading@10 449 vld1.8 {d16,d17}, [r4,:128], r12
yading@10 450 vld1.8 {d30,d31}, [r4,:128]
yading@10 451 lowpass_8.16 q8, q15, d16, d17, d30, d31, d15
yading@10 452
yading@10 453 transpose_8x8 d12, d13, d14, d15, d8, d9, d10, d11
yading@10 454
yading@10 455 bx lr
yading@10 456 endfunc
yading@10 457
yading@10 458 .macro h264_qpel8_hv_lowpass type
yading@10 459 function \type\()_h264_qpel8_hv_lowpass_neon
yading@10 460 mov r10, lr
yading@10 461 bl put_h264_qpel8_hv_lowpass_neon_top
yading@10 462 .ifc \type,avg
yading@10 463 vld1.8 {d0}, [r0,:64], r2
yading@10 464 vrhadd.u8 d12, d12, d0
yading@10 465 vld1.8 {d1}, [r0,:64], r2
yading@10 466 vrhadd.u8 d13, d13, d1
yading@10 467 vld1.8 {d2}, [r0,:64], r2
yading@10 468 vrhadd.u8 d14, d14, d2
yading@10 469 vld1.8 {d3}, [r0,:64], r2
yading@10 470 vrhadd.u8 d15, d15, d3
yading@10 471 vld1.8 {d4}, [r0,:64], r2
yading@10 472 vrhadd.u8 d8, d8, d4
yading@10 473 vld1.8 {d5}, [r0,:64], r2
yading@10 474 vrhadd.u8 d9, d9, d5
yading@10 475 vld1.8 {d6}, [r0,:64], r2
yading@10 476 vrhadd.u8 d10, d10, d6
yading@10 477 vld1.8 {d7}, [r0,:64], r2
yading@10 478 vrhadd.u8 d11, d11, d7
yading@10 479 sub r0, r0, r2, lsl #3
yading@10 480 .endif
yading@10 481
yading@10 482 vst1.8 {d12}, [r0,:64], r2
yading@10 483 vst1.8 {d13}, [r0,:64], r2
yading@10 484 vst1.8 {d14}, [r0,:64], r2
yading@10 485 vst1.8 {d15}, [r0,:64], r2
yading@10 486 vst1.8 {d8}, [r0,:64], r2
yading@10 487 vst1.8 {d9}, [r0,:64], r2
yading@10 488 vst1.8 {d10}, [r0,:64], r2
yading@10 489 vst1.8 {d11}, [r0,:64], r2
yading@10 490
yading@10 491 mov lr, r10
yading@10 492 bx lr
yading@10 493 endfunc
yading@10 494 .endm
yading@10 495
yading@10 496 h264_qpel8_hv_lowpass put
yading@10 497 h264_qpel8_hv_lowpass avg
yading@10 498
yading@10 499 .macro h264_qpel8_hv_lowpass_l2 type
yading@10 500 function \type\()_h264_qpel8_hv_lowpass_l2_neon
yading@10 501 mov r10, lr
yading@10 502 bl put_h264_qpel8_hv_lowpass_neon_top
yading@10 503
yading@10 504 vld1.8 {d0, d1}, [r2,:128]!
yading@10 505 vld1.8 {d2, d3}, [r2,:128]!
yading@10 506 vrhadd.u8 q0, q0, q6
yading@10 507 vld1.8 {d4, d5}, [r2,:128]!
yading@10 508 vrhadd.u8 q1, q1, q7
yading@10 509 vld1.8 {d6, d7}, [r2,:128]!
yading@10 510 vrhadd.u8 q2, q2, q4
yading@10 511 vrhadd.u8 q3, q3, q5
yading@10 512 .ifc \type,avg
yading@10 513 vld1.8 {d16}, [r0,:64], r3
yading@10 514 vrhadd.u8 d0, d0, d16
yading@10 515 vld1.8 {d17}, [r0,:64], r3
yading@10 516 vrhadd.u8 d1, d1, d17
yading@10 517 vld1.8 {d18}, [r0,:64], r3
yading@10 518 vrhadd.u8 d2, d2, d18
yading@10 519 vld1.8 {d19}, [r0,:64], r3
yading@10 520 vrhadd.u8 d3, d3, d19
yading@10 521 vld1.8 {d20}, [r0,:64], r3
yading@10 522 vrhadd.u8 d4, d4, d20
yading@10 523 vld1.8 {d21}, [r0,:64], r3
yading@10 524 vrhadd.u8 d5, d5, d21
yading@10 525 vld1.8 {d22}, [r0,:64], r3
yading@10 526 vrhadd.u8 d6, d6, d22
yading@10 527 vld1.8 {d23}, [r0,:64], r3
yading@10 528 vrhadd.u8 d7, d7, d23
yading@10 529 sub r0, r0, r3, lsl #3
yading@10 530 .endif
yading@10 531 vst1.8 {d0}, [r0,:64], r3
yading@10 532 vst1.8 {d1}, [r0,:64], r3
yading@10 533 vst1.8 {d2}, [r0,:64], r3
yading@10 534 vst1.8 {d3}, [r0,:64], r3
yading@10 535 vst1.8 {d4}, [r0,:64], r3
yading@10 536 vst1.8 {d5}, [r0,:64], r3
yading@10 537 vst1.8 {d6}, [r0,:64], r3
yading@10 538 vst1.8 {d7}, [r0,:64], r3
yading@10 539
yading@10 540 mov lr, r10
yading@10 541 bx lr
yading@10 542 endfunc
yading@10 543 .endm
yading@10 544
yading@10 545 h264_qpel8_hv_lowpass_l2 put
yading@10 546 h264_qpel8_hv_lowpass_l2 avg
yading@10 547
yading@10 548 .macro h264_qpel16_hv type
yading@10 549 function \type\()_h264_qpel16_hv_lowpass_neon
yading@10 550 mov r9, lr
yading@10 551 bl \type\()_h264_qpel8_hv_lowpass_neon
yading@10 552 sub r1, r1, r3, lsl #2
yading@10 553 bl \type\()_h264_qpel8_hv_lowpass_neon
yading@10 554 sub r1, r1, r3, lsl #4
yading@10 555 sub r1, r1, r3, lsl #2
yading@10 556 add r1, r1, #8
yading@10 557 sub r0, r0, r2, lsl #4
yading@10 558 add r0, r0, #8
yading@10 559 bl \type\()_h264_qpel8_hv_lowpass_neon
yading@10 560 sub r1, r1, r3, lsl #2
yading@10 561 mov lr, r9
yading@10 562 b \type\()_h264_qpel8_hv_lowpass_neon
yading@10 563 endfunc
yading@10 564
yading@10 565 function \type\()_h264_qpel16_hv_lowpass_l2_neon
yading@10 566 mov r9, lr
yading@10 567 sub r2, r4, #256
yading@10 568 bl \type\()_h264_qpel8_hv_lowpass_l2_neon
yading@10 569 sub r1, r1, r3, lsl #2
yading@10 570 bl \type\()_h264_qpel8_hv_lowpass_l2_neon
yading@10 571 sub r1, r1, r3, lsl #4
yading@10 572 sub r1, r1, r3, lsl #2
yading@10 573 add r1, r1, #8
yading@10 574 sub r0, r0, r3, lsl #4
yading@10 575 add r0, r0, #8
yading@10 576 bl \type\()_h264_qpel8_hv_lowpass_l2_neon
yading@10 577 sub r1, r1, r3, lsl #2
yading@10 578 mov lr, r9
yading@10 579 b \type\()_h264_qpel8_hv_lowpass_l2_neon
yading@10 580 endfunc
yading@10 581 .endm
yading@10 582
yading@10 583 h264_qpel16_hv put
yading@10 584 h264_qpel16_hv avg
yading@10 585
yading@10 586 .macro h264_qpel8 type
yading@10 587 function ff_\type\()_h264_qpel8_mc10_neon, export=1
yading@10 588 lowpass_const r3
yading@10 589 mov r3, r1
yading@10 590 sub r1, r1, #2
yading@10 591 mov r12, #8
yading@10 592 b \type\()_h264_qpel8_h_lowpass_l2_neon
yading@10 593 endfunc
yading@10 594
yading@10 595 function ff_\type\()_h264_qpel8_mc20_neon, export=1
yading@10 596 lowpass_const r3
yading@10 597 sub r1, r1, #2
yading@10 598 mov r3, r2
yading@10 599 mov r12, #8
yading@10 600 b \type\()_h264_qpel8_h_lowpass_neon
yading@10 601 endfunc
yading@10 602
yading@10 603 function ff_\type\()_h264_qpel8_mc30_neon, export=1
yading@10 604 lowpass_const r3
yading@10 605 add r3, r1, #1
yading@10 606 sub r1, r1, #2
yading@10 607 mov r12, #8
yading@10 608 b \type\()_h264_qpel8_h_lowpass_l2_neon
yading@10 609 endfunc
yading@10 610
yading@10 611 function ff_\type\()_h264_qpel8_mc01_neon, export=1
yading@10 612 push {lr}
yading@10 613 mov r12, r1
yading@10 614 \type\()_h264_qpel8_mc01:
yading@10 615 lowpass_const r3
yading@10 616 mov r3, r2
yading@10 617 sub r1, r1, r2, lsl #1
yading@10 618 vpush {d8-d15}
yading@10 619 bl \type\()_h264_qpel8_v_lowpass_l2_neon
yading@10 620 vpop {d8-d15}
yading@10 621 pop {pc}
yading@10 622 endfunc
yading@10 623
yading@10 624 function ff_\type\()_h264_qpel8_mc11_neon, export=1
yading@10 625 push {r0, r1, r11, lr}
yading@10 626 \type\()_h264_qpel8_mc11:
yading@10 627 lowpass_const r3
yading@10 628 mov r11, sp
yading@10 629 A bic sp, sp, #15
yading@10 630 T bic r0, r11, #15
yading@10 631 T mov sp, r0
yading@10 632 sub sp, sp, #64
yading@10 633 mov r0, sp
yading@10 634 sub r1, r1, #2
yading@10 635 mov r3, #8
yading@10 636 mov r12, #8
yading@10 637 vpush {d8-d15}
yading@10 638 bl put_h264_qpel8_h_lowpass_neon
yading@10 639 ldrd r0, r1, [r11], #8
yading@10 640 mov r3, r2
yading@10 641 add r12, sp, #64
yading@10 642 sub r1, r1, r2, lsl #1
yading@10 643 mov r2, #8
yading@10 644 bl \type\()_h264_qpel8_v_lowpass_l2_neon
yading@10 645 vpop {d8-d15}
yading@10 646 mov sp, r11
yading@10 647 pop {r11, pc}
yading@10 648 endfunc
yading@10 649
yading@10 650 function ff_\type\()_h264_qpel8_mc21_neon, export=1
yading@10 651 push {r0, r1, r4, r10, r11, lr}
yading@10 652 \type\()_h264_qpel8_mc21:
yading@10 653 lowpass_const r3
yading@10 654 mov r11, sp
yading@10 655 A bic sp, sp, #15
yading@10 656 T bic r0, r11, #15
yading@10 657 T mov sp, r0
yading@10 658 sub sp, sp, #(8*8+16*12)
yading@10 659 sub r1, r1, #2
yading@10 660 mov r3, #8
yading@10 661 mov r0, sp
yading@10 662 mov r12, #8
yading@10 663 vpush {d8-d15}
yading@10 664 bl put_h264_qpel8_h_lowpass_neon
yading@10 665 mov r4, r0
yading@10 666 ldrd r0, r1, [r11], #8
yading@10 667 sub r1, r1, r2, lsl #1
yading@10 668 sub r1, r1, #2
yading@10 669 mov r3, r2
yading@10 670 sub r2, r4, #64
yading@10 671 bl \type\()_h264_qpel8_hv_lowpass_l2_neon
yading@10 672 vpop {d8-d15}
yading@10 673 mov sp, r11
yading@10 674 pop {r4, r10, r11, pc}
yading@10 675 endfunc
yading@10 676
yading@10 677 function ff_\type\()_h264_qpel8_mc31_neon, export=1
yading@10 678 add r1, r1, #1
yading@10 679 push {r0, r1, r11, lr}
yading@10 680 sub r1, r1, #1
yading@10 681 b \type\()_h264_qpel8_mc11
yading@10 682 endfunc
yading@10 683
yading@10 684 function ff_\type\()_h264_qpel8_mc02_neon, export=1
yading@10 685 push {lr}
yading@10 686 lowpass_const r3
yading@10 687 sub r1, r1, r2, lsl #1
yading@10 688 mov r3, r2
yading@10 689 vpush {d8-d15}
yading@10 690 bl \type\()_h264_qpel8_v_lowpass_neon
yading@10 691 vpop {d8-d15}
yading@10 692 pop {pc}
yading@10 693 endfunc
yading@10 694
yading@10 695 function ff_\type\()_h264_qpel8_mc12_neon, export=1
yading@10 696 push {r0, r1, r4, r10, r11, lr}
yading@10 697 \type\()_h264_qpel8_mc12:
yading@10 698 lowpass_const r3
yading@10 699 mov r11, sp
yading@10 700 A bic sp, sp, #15
yading@10 701 T bic r0, r11, #15
yading@10 702 T mov sp, r0
yading@10 703 sub sp, sp, #(8*8+16*12)
yading@10 704 sub r1, r1, r2, lsl #1
yading@10 705 mov r3, r2
yading@10 706 mov r2, #8
yading@10 707 mov r0, sp
yading@10 708 vpush {d8-d15}
yading@10 709 bl put_h264_qpel8_v_lowpass_neon
yading@10 710 mov r4, r0
yading@10 711 ldrd r0, r1, [r11], #8
yading@10 712 sub r1, r1, r3, lsl #1
yading@10 713 sub r1, r1, #2
yading@10 714 sub r2, r4, #64
yading@10 715 bl \type\()_h264_qpel8_hv_lowpass_l2_neon
yading@10 716 vpop {d8-d15}
yading@10 717 mov sp, r11
yading@10 718 pop {r4, r10, r11, pc}
yading@10 719 endfunc
yading@10 720
yading@10 721 function ff_\type\()_h264_qpel8_mc22_neon, export=1
yading@10 722 push {r4, r10, r11, lr}
yading@10 723 mov r11, sp
yading@10 724 A bic sp, sp, #15
yading@10 725 T bic r4, r11, #15
yading@10 726 T mov sp, r4
yading@10 727 sub r1, r1, r2, lsl #1
yading@10 728 sub r1, r1, #2
yading@10 729 mov r3, r2
yading@10 730 sub sp, sp, #(16*12)
yading@10 731 mov r4, sp
yading@10 732 vpush {d8-d15}
yading@10 733 bl \type\()_h264_qpel8_hv_lowpass_neon
yading@10 734 vpop {d8-d15}
yading@10 735 mov sp, r11
yading@10 736 pop {r4, r10, r11, pc}
yading@10 737 endfunc
yading@10 738
yading@10 739 function ff_\type\()_h264_qpel8_mc32_neon, export=1
yading@10 740 push {r0, r1, r4, r10, r11, lr}
yading@10 741 add r1, r1, #1
yading@10 742 b \type\()_h264_qpel8_mc12
yading@10 743 endfunc
yading@10 744
yading@10 745 function ff_\type\()_h264_qpel8_mc03_neon, export=1
yading@10 746 push {lr}
yading@10 747 add r12, r1, r2
yading@10 748 b \type\()_h264_qpel8_mc01
yading@10 749 endfunc
yading@10 750
yading@10 751 function ff_\type\()_h264_qpel8_mc13_neon, export=1
yading@10 752 push {r0, r1, r11, lr}
yading@10 753 add r1, r1, r2
yading@10 754 b \type\()_h264_qpel8_mc11
yading@10 755 endfunc
yading@10 756
yading@10 757 function ff_\type\()_h264_qpel8_mc23_neon, export=1
yading@10 758 push {r0, r1, r4, r10, r11, lr}
yading@10 759 add r1, r1, r2
yading@10 760 b \type\()_h264_qpel8_mc21
yading@10 761 endfunc
yading@10 762
yading@10 763 function ff_\type\()_h264_qpel8_mc33_neon, export=1
yading@10 764 add r1, r1, #1
yading@10 765 push {r0, r1, r11, lr}
yading@10 766 add r1, r1, r2
yading@10 767 sub r1, r1, #1
yading@10 768 b \type\()_h264_qpel8_mc11
yading@10 769 endfunc
yading@10 770 .endm
yading@10 771
yading@10 772 h264_qpel8 put
yading@10 773 h264_qpel8 avg
yading@10 774
yading@10 775 .macro h264_qpel16 type
yading@10 776 function ff_\type\()_h264_qpel16_mc10_neon, export=1
yading@10 777 lowpass_const r3
yading@10 778 mov r3, r1
yading@10 779 sub r1, r1, #2
yading@10 780 b \type\()_h264_qpel16_h_lowpass_l2_neon
yading@10 781 endfunc
yading@10 782
yading@10 783 function ff_\type\()_h264_qpel16_mc20_neon, export=1
yading@10 784 lowpass_const r3
yading@10 785 sub r1, r1, #2
yading@10 786 mov r3, r2
yading@10 787 b \type\()_h264_qpel16_h_lowpass_neon
yading@10 788 endfunc
yading@10 789
yading@10 790 function ff_\type\()_h264_qpel16_mc30_neon, export=1
yading@10 791 lowpass_const r3
yading@10 792 add r3, r1, #1
yading@10 793 sub r1, r1, #2
yading@10 794 b \type\()_h264_qpel16_h_lowpass_l2_neon
yading@10 795 endfunc
yading@10 796
yading@10 797 function ff_\type\()_h264_qpel16_mc01_neon, export=1
yading@10 798 push {r4, lr}
yading@10 799 mov r12, r1
yading@10 800 \type\()_h264_qpel16_mc01:
yading@10 801 lowpass_const r3
yading@10 802 mov r3, r2
yading@10 803 sub r1, r1, r2, lsl #1
yading@10 804 vpush {d8-d15}
yading@10 805 bl \type\()_h264_qpel16_v_lowpass_l2_neon
yading@10 806 vpop {d8-d15}
yading@10 807 pop {r4, pc}
yading@10 808 endfunc
yading@10 809
yading@10 810 function ff_\type\()_h264_qpel16_mc11_neon, export=1
yading@10 811 push {r0, r1, r4, r11, lr}
yading@10 812 \type\()_h264_qpel16_mc11:
yading@10 813 lowpass_const r3
yading@10 814 mov r11, sp
yading@10 815 A bic sp, sp, #15
yading@10 816 T bic r0, r11, #15
yading@10 817 T mov sp, r0
yading@10 818 sub sp, sp, #256
yading@10 819 mov r0, sp
yading@10 820 sub r1, r1, #2
yading@10 821 mov r3, #16
yading@10 822 vpush {d8-d15}
yading@10 823 bl put_h264_qpel16_h_lowpass_neon
yading@10 824 ldrd r0, r1, [r11], #8
yading@10 825 mov r3, r2
yading@10 826 add r12, sp, #64
yading@10 827 sub r1, r1, r2, lsl #1
yading@10 828 mov r2, #16
yading@10 829 bl \type\()_h264_qpel16_v_lowpass_l2_neon
yading@10 830 vpop {d8-d15}
yading@10 831 mov sp, r11
yading@10 832 pop {r4, r11, pc}
yading@10 833 endfunc
yading@10 834
yading@10 835 function ff_\type\()_h264_qpel16_mc21_neon, export=1
yading@10 836 push {r0, r1, r4-r5, r9-r11, lr}
yading@10 837 \type\()_h264_qpel16_mc21:
yading@10 838 lowpass_const r3
yading@10 839 mov r11, sp
yading@10 840 A bic sp, sp, #15
yading@10 841 T bic r0, r11, #15
yading@10 842 T mov sp, r0
yading@10 843 sub sp, sp, #(16*16+16*12)
yading@10 844 sub r1, r1, #2
yading@10 845 mov r0, sp
yading@10 846 vpush {d8-d15}
yading@10 847 bl put_h264_qpel16_h_lowpass_neon_packed
yading@10 848 mov r4, r0
yading@10 849 ldrd r0, r1, [r11], #8
yading@10 850 sub r1, r1, r2, lsl #1
yading@10 851 sub r1, r1, #2
yading@10 852 mov r3, r2
yading@10 853 bl \type\()_h264_qpel16_hv_lowpass_l2_neon
yading@10 854 vpop {d8-d15}
yading@10 855 mov sp, r11
yading@10 856 pop {r4-r5, r9-r11, pc}
yading@10 857 endfunc
yading@10 858
yading@10 859 function ff_\type\()_h264_qpel16_mc31_neon, export=1
yading@10 860 add r1, r1, #1
yading@10 861 push {r0, r1, r4, r11, lr}
yading@10 862 sub r1, r1, #1
yading@10 863 b \type\()_h264_qpel16_mc11
yading@10 864 endfunc
yading@10 865
yading@10 866 function ff_\type\()_h264_qpel16_mc02_neon, export=1
yading@10 867 push {r4, lr}
yading@10 868 lowpass_const r3
yading@10 869 sub r1, r1, r2, lsl #1
yading@10 870 mov r3, r2
yading@10 871 vpush {d8-d15}
yading@10 872 bl \type\()_h264_qpel16_v_lowpass_neon
yading@10 873 vpop {d8-d15}
yading@10 874 pop {r4, pc}
yading@10 875 endfunc
yading@10 876
yading@10 877 function ff_\type\()_h264_qpel16_mc12_neon, export=1
yading@10 878 push {r0, r1, r4-r5, r9-r11, lr}
yading@10 879 \type\()_h264_qpel16_mc12:
yading@10 880 lowpass_const r3
yading@10 881 mov r11, sp
yading@10 882 A bic sp, sp, #15
yading@10 883 T bic r0, r11, #15
yading@10 884 T mov sp, r0
yading@10 885 sub sp, sp, #(16*16+16*12)
yading@10 886 sub r1, r1, r2, lsl #1
yading@10 887 mov r0, sp
yading@10 888 mov r3, r2
yading@10 889 vpush {d8-d15}
yading@10 890 bl put_h264_qpel16_v_lowpass_neon_packed
yading@10 891 mov r4, r0
yading@10 892 ldrd r0, r1, [r11], #8
yading@10 893 sub r1, r1, r3, lsl #1
yading@10 894 sub r1, r1, #2
yading@10 895 mov r2, r3
yading@10 896 bl \type\()_h264_qpel16_hv_lowpass_l2_neon
yading@10 897 vpop {d8-d15}
yading@10 898 mov sp, r11
yading@10 899 pop {r4-r5, r9-r11, pc}
yading@10 900 endfunc
yading@10 901
yading@10 902 function ff_\type\()_h264_qpel16_mc22_neon, export=1
yading@10 903 push {r4, r9-r11, lr}
yading@10 904 lowpass_const r3
yading@10 905 mov r11, sp
yading@10 906 A bic sp, sp, #15
yading@10 907 T bic r4, r11, #15
yading@10 908 T mov sp, r4
yading@10 909 sub r1, r1, r2, lsl #1
yading@10 910 sub r1, r1, #2
yading@10 911 mov r3, r2
yading@10 912 sub sp, sp, #(16*12)
yading@10 913 mov r4, sp
yading@10 914 vpush {d8-d15}
yading@10 915 bl \type\()_h264_qpel16_hv_lowpass_neon
yading@10 916 vpop {d8-d15}
yading@10 917 mov sp, r11
yading@10 918 pop {r4, r9-r11, pc}
yading@10 919 endfunc
yading@10 920
yading@10 921 function ff_\type\()_h264_qpel16_mc32_neon, export=1
yading@10 922 push {r0, r1, r4-r5, r9-r11, lr}
yading@10 923 add r1, r1, #1
yading@10 924 b \type\()_h264_qpel16_mc12
yading@10 925 endfunc
yading@10 926
yading@10 927 function ff_\type\()_h264_qpel16_mc03_neon, export=1
yading@10 928 push {r4, lr}
yading@10 929 add r12, r1, r2
yading@10 930 b \type\()_h264_qpel16_mc01
yading@10 931 endfunc
yading@10 932
yading@10 933 function ff_\type\()_h264_qpel16_mc13_neon, export=1
yading@10 934 push {r0, r1, r4, r11, lr}
yading@10 935 add r1, r1, r2
yading@10 936 b \type\()_h264_qpel16_mc11
yading@10 937 endfunc
yading@10 938
yading@10 939 function ff_\type\()_h264_qpel16_mc23_neon, export=1
yading@10 940 push {r0, r1, r4-r5, r9-r11, lr}
yading@10 941 add r1, r1, r2
yading@10 942 b \type\()_h264_qpel16_mc21
yading@10 943 endfunc
yading@10 944
yading@10 945 function ff_\type\()_h264_qpel16_mc33_neon, export=1
yading@10 946 add r1, r1, #1
yading@10 947 push {r0, r1, r4, r11, lr}
yading@10 948 add r1, r1, r2
yading@10 949 sub r1, r1, #1
yading@10 950 b \type\()_h264_qpel16_mc11
yading@10 951 endfunc
yading@10 952 .endm
yading@10 953
yading@10 954 h264_qpel16 put
yading@10 955 h264_qpel16 avg