yading@10: /* yading@10: * Copyright (c) 2010 Mans Rullgard yading@10: * yading@10: * This file is part of FFmpeg. yading@10: * yading@10: * FFmpeg is free software; you can redistribute it and/or yading@10: * modify it under the terms of the GNU Lesser General Public yading@10: * License as published by the Free Software Foundation; either yading@10: * version 2.1 of the License, or (at your option) any later version. yading@10: * yading@10: * FFmpeg is distributed in the hope that it will be useful, yading@10: * but WITHOUT ANY WARRANTY; without even the implied warranty of yading@10: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU yading@10: * Lesser General Public License for more details. yading@10: * yading@10: * You should have received a copy of the GNU Lesser General Public yading@10: * License along with FFmpeg; if not, write to the Free Software yading@10: * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA yading@10: */ yading@10: yading@10: #include "libavutil/arm/asm.S" yading@10: #include "asm-offsets.h" yading@10: yading@10: function ff_dct_unquantize_h263_inter_neon, export=1 yading@10: add r12, r0, #BLOCK_LAST_INDEX yading@10: ldr r12, [r12, r2, lsl #2] yading@10: add r0, r0, #INTER_SCANTAB_RASTER_END yading@10: ldrb r12, [r0, r12] yading@10: sub r2, r3, #1 yading@10: lsl r0, r3, #1 yading@10: orr r2, r2, #1 yading@10: add r3, r12, #1 yading@10: endfunc yading@10: yading@10: function ff_dct_unquantize_h263_neon, export=1 yading@10: vdup.16 q15, r0 @ qmul yading@10: vdup.16 q14, r2 @ qadd yading@10: vneg.s16 q13, q14 yading@10: cmp r3, #4 yading@10: mov r0, r1 yading@10: ble 2f yading@10: 1: yading@10: vld1.16 {q0}, [r0,:128]! yading@10: vclt.s16 q3, q0, #0 yading@10: vld1.16 {q8}, [r0,:128]! yading@10: vceq.s16 q1, q0, #0 yading@10: vmul.s16 q2, q0, q15 yading@10: vclt.s16 q11, q8, #0 yading@10: vmul.s16 q10, q8, q15 yading@10: vbsl q3, q13, q14 yading@10: vbsl q11, q13, q14 yading@10: vadd.s16 q2, q2, q3 yading@10: vceq.s16 q9, q8, #0 yading@10: vadd.s16 q10, q10, q11 yading@10: vbif q0, q2, q1 yading@10: vbif q8, q10, q9 yading@10: subs r3, r3, #16 yading@10: vst1.16 {q0}, [r1,:128]! yading@10: vst1.16 {q8}, [r1,:128]! yading@10: it le yading@10: bxle lr yading@10: cmp r3, #8 yading@10: bgt 1b yading@10: 2: yading@10: vld1.16 {d0}, [r0,:64] yading@10: vclt.s16 d3, d0, #0 yading@10: vceq.s16 d1, d0, #0 yading@10: vmul.s16 d2, d0, d30 yading@10: vbsl d3, d26, d28 yading@10: vadd.s16 d2, d2, d3 yading@10: vbif d0, d2, d1 yading@10: vst1.16 {d0}, [r1,:64] yading@10: bx lr yading@10: endfunc yading@10: yading@10: function ff_dct_unquantize_h263_intra_neon, export=1 yading@10: push {r4-r6,lr} yading@10: add r12, r0, #BLOCK_LAST_INDEX yading@10: ldr r6, [r0, #AC_PRED] yading@10: add lr, r0, #INTER_SCANTAB_RASTER_END yading@10: cmp r6, #0 yading@10: it ne yading@10: movne r12, #63 yading@10: bne 1f yading@10: ldr r12, [r12, r2, lsl #2] yading@10: ldrb r12, [lr, r12] yading@10: 1: ldr r5, [r0, #H263_AIC] yading@10: ldrsh r4, [r1] yading@10: cmp r5, #0 yading@10: mov r5, r1 yading@10: it ne yading@10: movne r2, #0 yading@10: bne 2f yading@10: cmp r2, #4 yading@10: it ge yading@10: addge r0, r0, #4 yading@10: sub r2, r3, #1 yading@10: ldr r6, [r0, #Y_DC_SCALE] yading@10: orr r2, r2, #1 yading@10: smulbb r4, r4, r6 yading@10: 2: lsl r0, r3, #1 yading@10: add r3, r12, #1 yading@10: bl ff_dct_unquantize_h263_neon yading@10: vmov.16 d0[0], r4 yading@10: vst1.16 {d0[0]}, [r5] yading@10: pop {r4-r6,pc} yading@10: endfunc