yading@10: @ yading@10: @ ARMv4 optimized DSP utils yading@10: @ Copyright (c) 2004 AGAWA Koji yading@10: @ yading@10: @ This file is part of FFmpeg. yading@10: @ yading@10: @ FFmpeg is free software; you can redistribute it and/or yading@10: @ modify it under the terms of the GNU Lesser General Public yading@10: @ License as published by the Free Software Foundation; either yading@10: @ version 2.1 of the License, or (at your option) any later version. yading@10: @ yading@10: @ FFmpeg is distributed in the hope that it will be useful, yading@10: @ but WITHOUT ANY WARRANTY; without even the implied warranty of yading@10: @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU yading@10: @ Lesser General Public License for more details. yading@10: @ yading@10: @ You should have received a copy of the GNU Lesser General Public yading@10: @ License along with FFmpeg; if not, write to the Free Software yading@10: @ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA yading@10: @ yading@10: yading@10: #include "config.h" yading@10: #include "libavutil/arm/asm.S" yading@10: yading@10: #if !HAVE_ARMV5TE_EXTERNAL yading@10: #define pld @ yading@10: #endif yading@10: yading@10: .align 5 yading@10: @ void ff_add_pixels_clamped_arm(int16_t *block, uint8_t *dest, int stride) yading@10: function ff_add_pixels_clamped_arm, export=1 yading@10: push {r4-r10} yading@10: mov r10, #8 yading@10: 1: yading@10: ldr r4, [r1] /* load dest */ yading@10: /* block[0] and block[1]*/ yading@10: ldrsh r5, [r0] yading@10: ldrsh r7, [r0, #2] yading@10: and r6, r4, #0xFF yading@10: and r8, r4, #0xFF00 yading@10: add r6, r6, r5 yading@10: add r8, r7, r8, lsr #8 yading@10: mvn r5, r5 yading@10: mvn r7, r7 yading@10: tst r6, #0x100 yading@10: it ne yading@10: movne r6, r5, lsr #24 yading@10: tst r8, #0x100 yading@10: it ne yading@10: movne r8, r7, lsr #24 yading@10: mov r9, r6 yading@10: ldrsh r5, [r0, #4] /* moved form [A] */ yading@10: orr r9, r9, r8, lsl #8 yading@10: /* block[2] and block[3] */ yading@10: /* [A] */ yading@10: ldrsh r7, [r0, #6] yading@10: and r6, r4, #0xFF0000 yading@10: and r8, r4, #0xFF000000 yading@10: add r6, r5, r6, lsr #16 yading@10: add r8, r7, r8, lsr #24 yading@10: mvn r5, r5 yading@10: mvn r7, r7 yading@10: tst r6, #0x100 yading@10: it ne yading@10: movne r6, r5, lsr #24 yading@10: tst r8, #0x100 yading@10: it ne yading@10: movne r8, r7, lsr #24 yading@10: orr r9, r9, r6, lsl #16 yading@10: ldr r4, [r1, #4] /* moved form [B] */ yading@10: orr r9, r9, r8, lsl #24 yading@10: /* store dest */ yading@10: ldrsh r5, [r0, #8] /* moved form [C] */ yading@10: str r9, [r1] yading@10: yading@10: /* load dest */ yading@10: /* [B] */ yading@10: /* block[4] and block[5] */ yading@10: /* [C] */ yading@10: ldrsh r7, [r0, #10] yading@10: and r6, r4, #0xFF yading@10: and r8, r4, #0xFF00 yading@10: add r6, r6, r5 yading@10: add r8, r7, r8, lsr #8 yading@10: mvn r5, r5 yading@10: mvn r7, r7 yading@10: tst r6, #0x100 yading@10: it ne yading@10: movne r6, r5, lsr #24 yading@10: tst r8, #0x100 yading@10: it ne yading@10: movne r8, r7, lsr #24 yading@10: mov r9, r6 yading@10: ldrsh r5, [r0, #12] /* moved from [D] */ yading@10: orr r9, r9, r8, lsl #8 yading@10: /* block[6] and block[7] */ yading@10: /* [D] */ yading@10: ldrsh r7, [r0, #14] yading@10: and r6, r4, #0xFF0000 yading@10: and r8, r4, #0xFF000000 yading@10: add r6, r5, r6, lsr #16 yading@10: add r8, r7, r8, lsr #24 yading@10: mvn r5, r5 yading@10: mvn r7, r7 yading@10: tst r6, #0x100 yading@10: it ne yading@10: movne r6, r5, lsr #24 yading@10: tst r8, #0x100 yading@10: it ne yading@10: movne r8, r7, lsr #24 yading@10: orr r9, r9, r6, lsl #16 yading@10: add r0, r0, #16 /* moved from [E] */ yading@10: orr r9, r9, r8, lsl #24 yading@10: subs r10, r10, #1 /* moved from [F] */ yading@10: /* store dest */ yading@10: str r9, [r1, #4] yading@10: yading@10: /* [E] */ yading@10: /* [F] */ yading@10: add r1, r1, r2 yading@10: bne 1b yading@10: yading@10: pop {r4-r10} yading@10: bx lr yading@10: endfunc