yading@10
|
1 /*
|
yading@10
|
2 * ARM NEON optimised DSP functions
|
yading@10
|
3 * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
|
yading@10
|
4 *
|
yading@10
|
5 * This file is part of FFmpeg.
|
yading@10
|
6 *
|
yading@10
|
7 * FFmpeg is free software; you can redistribute it and/or
|
yading@10
|
8 * modify it under the terms of the GNU Lesser General Public
|
yading@10
|
9 * License as published by the Free Software Foundation; either
|
yading@10
|
10 * version 2.1 of the License, or (at your option) any later version.
|
yading@10
|
11 *
|
yading@10
|
12 * FFmpeg is distributed in the hope that it will be useful,
|
yading@10
|
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
yading@10
|
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
yading@10
|
15 * Lesser General Public License for more details.
|
yading@10
|
16 *
|
yading@10
|
17 * You should have received a copy of the GNU Lesser General Public
|
yading@10
|
18 * License along with FFmpeg; if not, write to the Free Software
|
yading@10
|
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
yading@10
|
20 */
|
yading@10
|
21
|
yading@10
|
22 #include "libavutil/arm/asm.S"
|
yading@10
|
23
|
yading@10
|
24 function ff_vorbis_inverse_coupling_neon, export=1
|
yading@10
|
25 vmov.i32 q10, #1<<31
|
yading@10
|
26 subs r2, r2, #4
|
yading@10
|
27 mov r3, r0
|
yading@10
|
28 mov r12, r1
|
yading@10
|
29 beq 3f
|
yading@10
|
30
|
yading@10
|
31 vld1.32 {d24-d25},[r1,:128]!
|
yading@10
|
32 vld1.32 {d22-d23},[r0,:128]!
|
yading@10
|
33 vcle.s32 q8, q12, #0
|
yading@10
|
34 vand q9, q11, q10
|
yading@10
|
35 veor q12, q12, q9
|
yading@10
|
36 vand q2, q12, q8
|
yading@10
|
37 vbic q3, q12, q8
|
yading@10
|
38 vadd.f32 q12, q11, q2
|
yading@10
|
39 vsub.f32 q11, q11, q3
|
yading@10
|
40 1: vld1.32 {d2-d3}, [r1,:128]!
|
yading@10
|
41 vld1.32 {d0-d1}, [r0,:128]!
|
yading@10
|
42 vcle.s32 q8, q1, #0
|
yading@10
|
43 vand q9, q0, q10
|
yading@10
|
44 veor q1, q1, q9
|
yading@10
|
45 vst1.32 {d24-d25},[r3, :128]!
|
yading@10
|
46 vst1.32 {d22-d23},[r12,:128]!
|
yading@10
|
47 vand q2, q1, q8
|
yading@10
|
48 vbic q3, q1, q8
|
yading@10
|
49 vadd.f32 q1, q0, q2
|
yading@10
|
50 vsub.f32 q0, q0, q3
|
yading@10
|
51 subs r2, r2, #8
|
yading@10
|
52 ble 2f
|
yading@10
|
53 vld1.32 {d24-d25},[r1,:128]!
|
yading@10
|
54 vld1.32 {d22-d23},[r0,:128]!
|
yading@10
|
55 vcle.s32 q8, q12, #0
|
yading@10
|
56 vand q9, q11, q10
|
yading@10
|
57 veor q12, q12, q9
|
yading@10
|
58 vst1.32 {d2-d3}, [r3, :128]!
|
yading@10
|
59 vst1.32 {d0-d1}, [r12,:128]!
|
yading@10
|
60 vand q2, q12, q8
|
yading@10
|
61 vbic q3, q12, q8
|
yading@10
|
62 vadd.f32 q12, q11, q2
|
yading@10
|
63 vsub.f32 q11, q11, q3
|
yading@10
|
64 b 1b
|
yading@10
|
65
|
yading@10
|
66 2: vst1.32 {d2-d3}, [r3, :128]!
|
yading@10
|
67 vst1.32 {d0-d1}, [r12,:128]!
|
yading@10
|
68 it lt
|
yading@10
|
69 bxlt lr
|
yading@10
|
70
|
yading@10
|
71 3: vld1.32 {d2-d3}, [r1,:128]
|
yading@10
|
72 vld1.32 {d0-d1}, [r0,:128]
|
yading@10
|
73 vcle.s32 q8, q1, #0
|
yading@10
|
74 vand q9, q0, q10
|
yading@10
|
75 veor q1, q1, q9
|
yading@10
|
76 vand q2, q1, q8
|
yading@10
|
77 vbic q3, q1, q8
|
yading@10
|
78 vadd.f32 q1, q0, q2
|
yading@10
|
79 vsub.f32 q0, q0, q3
|
yading@10
|
80 vst1.32 {d2-d3}, [r0,:128]!
|
yading@10
|
81 vst1.32 {d0-d1}, [r1,:128]!
|
yading@10
|
82 bx lr
|
yading@10
|
83 endfunc
|