yading@10: /* yading@10: * Copyright (c) 2011 Mans Rullgard yading@10: * yading@10: * This file is part of Libav. yading@10: * yading@10: * Libav is free software; you can redistribute it and/or yading@10: * modify it under the terms of the GNU Lesser General Public yading@10: * License as published by the Free Software Foundation; either yading@10: * version 2.1 of the License, or (at your option) any later version. yading@10: * yading@10: * Libav is distributed in the hope that it will be useful, yading@10: * but WITHOUT ANY WARRANTY; without even the implied warranty of yading@10: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU yading@10: * Lesser General Public License for more details. yading@10: * yading@10: * You should have received a copy of the GNU Lesser General Public yading@10: * License along with Libav; if not, write to the Free Software yading@10: * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA yading@10: */ yading@10: yading@10: #include "libavutil/arm/asm.S" yading@10: yading@10: .macro skip args:vararg yading@10: .endm yading@10: yading@10: .macro sum8 lo, hi, w, p, t1, t2, t3, t4, rsb=skip, offs=0 yading@10: ldr \t1, [\w, #4*\offs] yading@10: ldr \t2, [\p, #4]! yading@10: \rsb \t1, \t1, #0 yading@10: .irpc i, 135 yading@10: ldr \t3, [\w, #4*64*\i+4*\offs] yading@10: ldr \t4, [\p, #4*64*\i] yading@10: smlal \lo, \hi, \t1, \t2 yading@10: \rsb \t3, \t3, #0 yading@10: ldr \t1, [\w, #4*64*(\i+1)+4*\offs] yading@10: ldr \t2, [\p, #4*64*(\i+1)] yading@10: smlal \lo, \hi, \t3, \t4 yading@10: \rsb \t1, \t1, #0 yading@10: .endr yading@10: ldr \t3, [\w, #4*64*7+4*\offs] yading@10: ldr \t4, [\p, #4*64*7] yading@10: smlal \lo, \hi, \t1, \t2 yading@10: \rsb \t3, \t3, #0 yading@10: smlal \lo, \hi, \t3, \t4 yading@10: .endm yading@10: yading@10: .macro round rd, lo, hi yading@10: lsr \rd, \lo, #24 yading@10: bic \lo, \lo, #0xff000000 yading@10: orr \rd, \rd, \hi, lsl #8 yading@10: mov \hi, #0 yading@10: ssat \rd, #16, \rd yading@10: .endm yading@10: yading@10: function ff_mpadsp_apply_window_fixed_armv6, export=1 yading@10: push {r2,r4-r11,lr} yading@10: yading@10: add r4, r0, #4*512 @ synth_buf + 512 yading@10: .rept 4 yading@10: ldm r0!, {r5-r12} yading@10: stm r4!, {r5-r12} yading@10: .endr yading@10: yading@10: ldr r4, [sp, #40] @ incr yading@10: sub r0, r0, #4*17 @ synth_buf + 16 yading@10: ldr r8, [r2] @ sum:low yading@10: add r2, r0, #4*32 @ synth_buf + 48 yading@10: rsb r5, r4, r4, lsl #5 @ 31 * incr yading@10: lsl r4, r4, #1 yading@10: asr r9, r8, #31 @ sum:high yading@10: add r5, r3, r5, lsl #1 @ samples2 yading@10: add r6, r1, #4*32 @ w2 yading@10: str r4, [sp, #40] yading@10: yading@10: sum8 r8, r9, r1, r0, r10, r11, r12, lr yading@10: sum8 r8, r9, r1, r2, r10, r11, r12, lr, rsb, 32 yading@10: round r10, r8, r9 yading@10: strh_post r10, r3, r4 yading@10: yading@10: mov lr, #15 yading@10: 1: yading@10: ldr r12, [r0, #4]! yading@10: ldr r11, [r6, #-4]! yading@10: ldr r10, [r1, #4]! yading@10: .irpc i, 0246 yading@10: .if \i yading@10: ldr r11, [r6, #4*64*\i] yading@10: ldr r10, [r1, #4*64*\i] yading@10: .endif yading@10: rsb r11, r11, #0 yading@10: smlal r8, r9, r10, r12 yading@10: ldr r10, [r0, #4*64*(\i+1)] yading@10: .ifeq \i yading@10: smull r4, r7, r11, r12 yading@10: .else yading@10: smlal r4, r7, r11, r12 yading@10: .endif yading@10: ldr r11, [r6, #4*64*(\i+1)] yading@10: ldr r12, [r1, #4*64*(\i+1)] yading@10: rsb r11, r11, #0 yading@10: smlal r8, r9, r12, r10 yading@10: .iflt \i-6 yading@10: ldr r12, [r0, #4*64*(\i+2)] yading@10: .else yading@10: ldr r12, [r2, #-4]! yading@10: .endif yading@10: smlal r4, r7, r11, r10 yading@10: .endr yading@10: .irpc i, 0246 yading@10: ldr r10, [r1, #4*64*\i+4*32] yading@10: rsb r12, r12, #0 yading@10: ldr r11, [r6, #4*64*\i+4*32] yading@10: smlal r8, r9, r10, r12 yading@10: ldr r10, [r2, #4*64*(\i+1)] yading@10: smlal r4, r7, r11, r12 yading@10: ldr r12, [r1, #4*64*(\i+1)+4*32] yading@10: rsb r10, r10, #0 yading@10: ldr r11, [r6, #4*64*(\i+1)+4*32] yading@10: smlal r8, r9, r12, r10 yading@10: .iflt \i-6 yading@10: ldr r12, [r2, #4*64*(\i+2)] yading@10: .else yading@10: ldr r12, [sp, #40] yading@10: .endif yading@10: smlal r4, r7, r11, r10 yading@10: .endr yading@10: round r10, r8, r9 yading@10: adds r8, r8, r4 yading@10: adc r9, r9, r7 yading@10: strh_post r10, r3, r12 yading@10: round r11, r8, r9 yading@10: subs lr, lr, #1 yading@10: strh_dpost r11, r5, r12 yading@10: bgt 1b yading@10: yading@10: sum8 r8, r9, r1, r0, r10, r11, r12, lr, rsb, 33 yading@10: pop {r4} yading@10: round r10, r8, r9 yading@10: str r8, [r4] yading@10: strh r10, [r3] yading@10: yading@10: pop {r4-r11,pc} yading@10: endfunc