yading@10
|
1 /*
|
yading@10
|
2 * Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
|
yading@10
|
3 *
|
yading@10
|
4 * This file is part of Libav.
|
yading@10
|
5 *
|
yading@10
|
6 * Libav is free software; you can redistribute it and/or
|
yading@10
|
7 * modify it under the terms of the GNU Lesser General Public
|
yading@10
|
8 * License as published by the Free Software Foundation; either
|
yading@10
|
9 * version 2.1 of the License, or (at your option) any later version.
|
yading@10
|
10 *
|
yading@10
|
11 * Libav is distributed in the hope that it will be useful,
|
yading@10
|
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
yading@10
|
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
yading@10
|
14 * Lesser General Public License for more details.
|
yading@10
|
15 *
|
yading@10
|
16 * You should have received a copy of the GNU Lesser General Public
|
yading@10
|
17 * License along with Libav; if not, write to the Free Software
|
yading@10
|
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
yading@10
|
19 */
|
yading@10
|
20
|
yading@10
|
21 #include "libavutil/arm/asm.S"
|
yading@10
|
22
|
yading@10
|
23 .macro prerot dst, rt
|
yading@10
|
24 lsr r3, r6, #2 @ n4
|
yading@10
|
25 add \rt, r4, r6, lsr #1 @ revtab + n4
|
yading@10
|
26 add r9, r3, r3, lsl #1 @ n3
|
yading@10
|
27 add r8, r7, r6 @ tcos + n4
|
yading@10
|
28 add r3, r2, r6, lsr #1 @ in + n4
|
yading@10
|
29 add r9, r2, r9, lsl #1 @ in + n3
|
yading@10
|
30 sub r8, r8, #16
|
yading@10
|
31 sub r10, r3, #16
|
yading@10
|
32 sub r11, r9, #16
|
yading@10
|
33 mov r12, #-16
|
yading@10
|
34 1:
|
yading@10
|
35 vld2.16 {d0,d1}, [r9, :128]!
|
yading@10
|
36 vld2.16 {d2,d3}, [r11,:128], r12
|
yading@10
|
37 vld2.16 {d4,d5}, [r3, :128]!
|
yading@10
|
38 vld2.16 {d6,d7}, [r10,:128], r12
|
yading@10
|
39 vld2.16 {d16,d17},[r7, :128]! @ cos, sin
|
yading@10
|
40 vld2.16 {d18,d19},[r8, :128], r12
|
yading@10
|
41 vrev64.16 q1, q1
|
yading@10
|
42 vrev64.16 q3, q3
|
yading@10
|
43 vrev64.16 q9, q9
|
yading@10
|
44 vneg.s16 d0, d0
|
yading@10
|
45 vneg.s16 d2, d2
|
yading@10
|
46 vneg.s16 d16, d16
|
yading@10
|
47 vneg.s16 d18, d18
|
yading@10
|
48 vhsub.s16 d0, d0, d3 @ re
|
yading@10
|
49 vhsub.s16 d4, d7, d4 @ im
|
yading@10
|
50 vhsub.s16 d6, d6, d5
|
yading@10
|
51 vhsub.s16 d2, d2, d1
|
yading@10
|
52 vmull.s16 q10, d0, d16
|
yading@10
|
53 vmlsl.s16 q10, d4, d17
|
yading@10
|
54 vmull.s16 q11, d0, d17
|
yading@10
|
55 vmlal.s16 q11, d4, d16
|
yading@10
|
56 vmull.s16 q12, d6, d18
|
yading@10
|
57 vmlsl.s16 q12, d2, d19
|
yading@10
|
58 vmull.s16 q13, d6, d19
|
yading@10
|
59 vmlal.s16 q13, d2, d18
|
yading@10
|
60 vshrn.s32 d0, q10, #15
|
yading@10
|
61 vshrn.s32 d1, q11, #15
|
yading@10
|
62 vshrn.s32 d2, q12, #15
|
yading@10
|
63 vshrn.s32 d3, q13, #15
|
yading@10
|
64 vzip.16 d0, d1
|
yading@10
|
65 vzip.16 d2, d3
|
yading@10
|
66 ldrh lr, [r4], #2
|
yading@10
|
67 ldrh r2, [\rt, #-2]!
|
yading@10
|
68 add lr, \dst, lr, lsl #2
|
yading@10
|
69 add r2, \dst, r2, lsl #2
|
yading@10
|
70 vst1.32 {d0[0]}, [lr,:32]
|
yading@10
|
71 vst1.32 {d2[0]}, [r2,:32]
|
yading@10
|
72 ldrh lr, [r4], #2
|
yading@10
|
73 ldrh r2, [\rt, #-2]!
|
yading@10
|
74 add lr, \dst, lr, lsl #2
|
yading@10
|
75 add r2, \dst, r2, lsl #2
|
yading@10
|
76 vst1.32 {d0[1]}, [lr,:32]
|
yading@10
|
77 vst1.32 {d2[1]}, [r2,:32]
|
yading@10
|
78 ldrh lr, [r4], #2
|
yading@10
|
79 ldrh r2, [\rt, #-2]!
|
yading@10
|
80 add lr, \dst, lr, lsl #2
|
yading@10
|
81 add r2, \dst, r2, lsl #2
|
yading@10
|
82 vst1.32 {d1[0]}, [lr,:32]
|
yading@10
|
83 vst1.32 {d3[0]}, [r2,:32]
|
yading@10
|
84 ldrh lr, [r4], #2
|
yading@10
|
85 ldrh r2, [\rt, #-2]!
|
yading@10
|
86 add lr, \dst, lr, lsl #2
|
yading@10
|
87 add r2, \dst, r2, lsl #2
|
yading@10
|
88 vst1.32 {d1[1]}, [lr,:32]
|
yading@10
|
89 vst1.32 {d3[1]}, [r2,:32]
|
yading@10
|
90 subs r6, r6, #32
|
yading@10
|
91 bgt 1b
|
yading@10
|
92 .endm
|
yading@10
|
93
|
yading@10
|
94 function ff_mdct_fixed_calc_neon, export=1
|
yading@10
|
95 push {r1,r4-r11,lr}
|
yading@10
|
96
|
yading@10
|
97 ldr r4, [r0, #8] @ revtab
|
yading@10
|
98 ldr r6, [r0, #16] @ mdct_size; n
|
yading@10
|
99 ldr r7, [r0, #24] @ tcos
|
yading@10
|
100
|
yading@10
|
101 prerot r1, r5
|
yading@10
|
102
|
yading@10
|
103 mov r4, r0
|
yading@10
|
104 bl X(ff_fft_fixed_calc_neon)
|
yading@10
|
105
|
yading@10
|
106 pop {r5}
|
yading@10
|
107 mov r12, #-16
|
yading@10
|
108 ldr r6, [r4, #16] @ mdct_size; n
|
yading@10
|
109 ldr r7, [r4, #24] @ tcos
|
yading@10
|
110 add r5, r5, r6, lsr #1
|
yading@10
|
111 add r7, r7, r6, lsr #1
|
yading@10
|
112 sub r1, r5, #16
|
yading@10
|
113 sub r2, r7, #16
|
yading@10
|
114 1:
|
yading@10
|
115 vld2.16 {d4,d5}, [r7,:128]!
|
yading@10
|
116 vld2.16 {d6,d7}, [r2,:128], r12
|
yading@10
|
117 vld2.16 {d0,d1}, [r5,:128]
|
yading@10
|
118 vld2.16 {d2,d3}, [r1,:128]
|
yading@10
|
119 vrev64.16 q3, q3
|
yading@10
|
120 vrev64.16 q1, q1
|
yading@10
|
121 vneg.s16 q3, q3
|
yading@10
|
122 vneg.s16 q2, q2
|
yading@10
|
123 vmull.s16 q11, d2, d6
|
yading@10
|
124 vmlal.s16 q11, d3, d7
|
yading@10
|
125 vmull.s16 q8, d0, d5
|
yading@10
|
126 vmlsl.s16 q8, d1, d4
|
yading@10
|
127 vmull.s16 q9, d0, d4
|
yading@10
|
128 vmlal.s16 q9, d1, d5
|
yading@10
|
129 vmull.s16 q10, d2, d7
|
yading@10
|
130 vmlsl.s16 q10, d3, d6
|
yading@10
|
131 vshrn.s32 d0, q11, #15
|
yading@10
|
132 vshrn.s32 d1, q8, #15
|
yading@10
|
133 vshrn.s32 d2, q9, #15
|
yading@10
|
134 vshrn.s32 d3, q10, #15
|
yading@10
|
135 vrev64.16 q0, q0
|
yading@10
|
136 vst2.16 {d2,d3}, [r5,:128]!
|
yading@10
|
137 vst2.16 {d0,d1}, [r1,:128], r12
|
yading@10
|
138 subs r6, r6, #32
|
yading@10
|
139 bgt 1b
|
yading@10
|
140
|
yading@10
|
141 pop {r4-r11,pc}
|
yading@10
|
142 endfunc
|
yading@10
|
143
|
yading@10
|
144 function ff_mdct_fixed_calcw_neon, export=1
|
yading@10
|
145 push {r1,r4-r11,lr}
|
yading@10
|
146
|
yading@10
|
147 ldrd r4, r5, [r0, #8] @ revtab, tmp_buf
|
yading@10
|
148 ldr r6, [r0, #16] @ mdct_size; n
|
yading@10
|
149 ldr r7, [r0, #24] @ tcos
|
yading@10
|
150
|
yading@10
|
151 prerot r5, r1
|
yading@10
|
152
|
yading@10
|
153 mov r4, r0
|
yading@10
|
154 mov r1, r5
|
yading@10
|
155 bl X(ff_fft_fixed_calc_neon)
|
yading@10
|
156
|
yading@10
|
157 pop {r7}
|
yading@10
|
158 mov r12, #-16
|
yading@10
|
159 ldr r6, [r4, #16] @ mdct_size; n
|
yading@10
|
160 ldr r9, [r4, #24] @ tcos
|
yading@10
|
161 add r5, r5, r6, lsr #1
|
yading@10
|
162 add r7, r7, r6
|
yading@10
|
163 add r9, r9, r6, lsr #1
|
yading@10
|
164 sub r3, r5, #16
|
yading@10
|
165 sub r1, r7, #16
|
yading@10
|
166 sub r2, r9, #16
|
yading@10
|
167 1:
|
yading@10
|
168 vld2.16 {d4,d5}, [r9,:128]!
|
yading@10
|
169 vld2.16 {d6,d7}, [r2,:128], r12
|
yading@10
|
170 vld2.16 {d0,d1}, [r5,:128]!
|
yading@10
|
171 vld2.16 {d2,d3}, [r3,:128], r12
|
yading@10
|
172 vrev64.16 q3, q3
|
yading@10
|
173 vrev64.16 q1, q1
|
yading@10
|
174 vneg.s16 q3, q3
|
yading@10
|
175 vneg.s16 q2, q2
|
yading@10
|
176 vmull.s16 q8, d2, d6
|
yading@10
|
177 vmlal.s16 q8, d3, d7
|
yading@10
|
178 vmull.s16 q9, d0, d5
|
yading@10
|
179 vmlsl.s16 q9, d1, d4
|
yading@10
|
180 vmull.s16 q10, d0, d4
|
yading@10
|
181 vmlal.s16 q10, d1, d5
|
yading@10
|
182 vmull.s16 q11, d2, d7
|
yading@10
|
183 vmlsl.s16 q11, d3, d6
|
yading@10
|
184 vrev64.32 q8, q8
|
yading@10
|
185 vrev64.32 q9, q9
|
yading@10
|
186 vst2.32 {q10,q11},[r7,:128]!
|
yading@10
|
187 vst2.32 {d16,d18},[r1,:128], r12
|
yading@10
|
188 vst2.32 {d17,d19},[r1,:128], r12
|
yading@10
|
189 subs r6, r6, #32
|
yading@10
|
190 bgt 1b
|
yading@10
|
191
|
yading@10
|
192 pop {r4-r11,pc}
|
yading@10
|
193 endfunc
|