yading@10: /* yading@10: * ARM NEON optimised DSP functions yading@10: * Copyright (c) 2008 Mans Rullgard yading@10: * yading@10: * This file is part of FFmpeg. yading@10: * yading@10: * FFmpeg is free software; you can redistribute it and/or yading@10: * modify it under the terms of the GNU Lesser General Public yading@10: * License as published by the Free Software Foundation; either yading@10: * version 2.1 of the License, or (at your option) any later version. yading@10: * yading@10: * FFmpeg is distributed in the hope that it will be useful, yading@10: * but WITHOUT ANY WARRANTY; without even the implied warranty of yading@10: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU yading@10: * Lesser General Public License for more details. yading@10: * yading@10: * You should have received a copy of the GNU Lesser General Public yading@10: * License along with FFmpeg; if not, write to the Free Software yading@10: * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA yading@10: */ yading@10: yading@10: #include "libavutil/arm/asm.S" yading@10: yading@10: function ff_vorbis_inverse_coupling_neon, export=1 yading@10: vmov.i32 q10, #1<<31 yading@10: subs r2, r2, #4 yading@10: mov r3, r0 yading@10: mov r12, r1 yading@10: beq 3f yading@10: yading@10: vld1.32 {d24-d25},[r1,:128]! yading@10: vld1.32 {d22-d23},[r0,:128]! yading@10: vcle.s32 q8, q12, #0 yading@10: vand q9, q11, q10 yading@10: veor q12, q12, q9 yading@10: vand q2, q12, q8 yading@10: vbic q3, q12, q8 yading@10: vadd.f32 q12, q11, q2 yading@10: vsub.f32 q11, q11, q3 yading@10: 1: vld1.32 {d2-d3}, [r1,:128]! yading@10: vld1.32 {d0-d1}, [r0,:128]! yading@10: vcle.s32 q8, q1, #0 yading@10: vand q9, q0, q10 yading@10: veor q1, q1, q9 yading@10: vst1.32 {d24-d25},[r3, :128]! yading@10: vst1.32 {d22-d23},[r12,:128]! yading@10: vand q2, q1, q8 yading@10: vbic q3, q1, q8 yading@10: vadd.f32 q1, q0, q2 yading@10: vsub.f32 q0, q0, q3 yading@10: subs r2, r2, #8 yading@10: ble 2f yading@10: vld1.32 {d24-d25},[r1,:128]! yading@10: vld1.32 {d22-d23},[r0,:128]! yading@10: vcle.s32 q8, q12, #0 yading@10: vand q9, q11, q10 yading@10: veor q12, q12, q9 yading@10: vst1.32 {d2-d3}, [r3, :128]! yading@10: vst1.32 {d0-d1}, [r12,:128]! yading@10: vand q2, q12, q8 yading@10: vbic q3, q12, q8 yading@10: vadd.f32 q12, q11, q2 yading@10: vsub.f32 q11, q11, q3 yading@10: b 1b yading@10: yading@10: 2: vst1.32 {d2-d3}, [r3, :128]! yading@10: vst1.32 {d0-d1}, [r12,:128]! yading@10: it lt yading@10: bxlt lr yading@10: yading@10: 3: vld1.32 {d2-d3}, [r1,:128] yading@10: vld1.32 {d0-d1}, [r0,:128] yading@10: vcle.s32 q8, q1, #0 yading@10: vand q9, q0, q10 yading@10: veor q1, q1, q9 yading@10: vand q2, q1, q8 yading@10: vbic q3, q1, q8 yading@10: vadd.f32 q1, q0, q2 yading@10: vsub.f32 q0, q0, q3 yading@10: vst1.32 {d2-d3}, [r0,:128]! yading@10: vst1.32 {d0-d1}, [r1,:128]! yading@10: bx lr yading@10: endfunc