yading@10
|
1 /*
|
yading@10
|
2 * Copyright (c) 2012
|
yading@10
|
3 * MIPS Technologies, Inc., California.
|
yading@10
|
4 *
|
yading@10
|
5 * Redistribution and use in source and binary forms, with or without
|
yading@10
|
6 * modification, are permitted provided that the following conditions
|
yading@10
|
7 * are met:
|
yading@10
|
8 * 1. Redistributions of source code must retain the above copyright
|
yading@10
|
9 * notice, this list of conditions and the following disclaimer.
|
yading@10
|
10 * 2. Redistributions in binary form must reproduce the above copyright
|
yading@10
|
11 * notice, this list of conditions and the following disclaimer in the
|
yading@10
|
12 * documentation and/or other materials provided with the distribution.
|
yading@10
|
13 * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
|
yading@10
|
14 * contributors may be used to endorse or promote products derived from
|
yading@10
|
15 * this software without specific prior written permission.
|
yading@10
|
16 *
|
yading@10
|
17 * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
|
yading@10
|
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
yading@10
|
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
yading@10
|
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
|
yading@10
|
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
yading@10
|
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
yading@10
|
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
yading@10
|
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
yading@10
|
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
yading@10
|
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
yading@10
|
27 * SUCH DAMAGE.
|
yading@10
|
28 *
|
yading@10
|
29 * Authors: Darko Laus (darko@mips.com)
|
yading@10
|
30 * Djordje Pesut (djordje@mips.com)
|
yading@10
|
31 * Mirjana Vulin (mvulin@mips.com)
|
yading@10
|
32 *
|
yading@10
|
33 * This file is part of FFmpeg.
|
yading@10
|
34 *
|
yading@10
|
35 * FFmpeg is free software; you can redistribute it and/or
|
yading@10
|
36 * modify it under the terms of the GNU Lesser General Public
|
yading@10
|
37 * License as published by the Free Software Foundation; either
|
yading@10
|
38 * version 2.1 of the License, or (at your option) any later version.
|
yading@10
|
39 *
|
yading@10
|
40 * FFmpeg is distributed in the hope that it will be useful,
|
yading@10
|
41 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
yading@10
|
42 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
yading@10
|
43 * Lesser General Public License for more details.
|
yading@10
|
44 *
|
yading@10
|
45 * You should have received a copy of the GNU Lesser General Public
|
yading@10
|
46 * License along with FFmpeg; if not, write to the Free Software
|
yading@10
|
47 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
yading@10
|
48 */
|
yading@10
|
49
|
yading@10
|
50 /**
|
yading@10
|
51 * @file
|
yading@10
|
52 * Reference: libavcodec/aacdec.c
|
yading@10
|
53 */
|
yading@10
|
54
|
yading@10
|
55 #include "libavcodec/aac.h"
|
yading@10
|
56 #include "aacdec_mips.h"
|
yading@10
|
57 #include "libavcodec/aactab.h"
|
yading@10
|
58 #include "libavcodec/sinewin.h"
|
yading@10
|
59
|
yading@10
|
60 #if HAVE_INLINE_ASM
|
yading@10
|
61 static av_always_inline int lcg_random(unsigned previous_val)
|
yading@10
|
62 {
|
yading@10
|
63 union { unsigned u; int s; } v = { previous_val * 1664525u + 1013904223 };
|
yading@10
|
64 return v.s;
|
yading@10
|
65 }
|
yading@10
|
66
|
yading@10
|
67 static void imdct_and_windowing_mips(AACContext *ac, SingleChannelElement *sce)
|
yading@10
|
68 {
|
yading@10
|
69 IndividualChannelStream *ics = &sce->ics;
|
yading@10
|
70 float *in = sce->coeffs;
|
yading@10
|
71 float *out = sce->ret;
|
yading@10
|
72 float *saved = sce->saved;
|
yading@10
|
73 const float *swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
|
yading@10
|
74 const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
|
yading@10
|
75 const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
|
yading@10
|
76 float *buf = ac->buf_mdct;
|
yading@10
|
77 int i;
|
yading@10
|
78
|
yading@10
|
79 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
|
yading@10
|
80 for (i = 0; i < 1024; i += 128)
|
yading@10
|
81 ac->mdct_small.imdct_half(&ac->mdct_small, buf + i, in + i);
|
yading@10
|
82 } else
|
yading@10
|
83 ac->mdct.imdct_half(&ac->mdct, buf, in);
|
yading@10
|
84
|
yading@10
|
85 /* window overlapping
|
yading@10
|
86 * NOTE: To simplify the overlapping code, all 'meaningless' short to long
|
yading@10
|
87 * and long to short transitions are considered to be short to short
|
yading@10
|
88 * transitions. This leaves just two cases (long to long and short to short)
|
yading@10
|
89 * with a little special sauce for EIGHT_SHORT_SEQUENCE.
|
yading@10
|
90 */
|
yading@10
|
91 if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) &&
|
yading@10
|
92 (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) {
|
yading@10
|
93 ac->fdsp.vector_fmul_window( out, saved, buf, lwindow_prev, 512);
|
yading@10
|
94 } else {
|
yading@10
|
95 {
|
yading@10
|
96 float *buf1 = saved;
|
yading@10
|
97 float *buf2 = out;
|
yading@10
|
98 int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
|
yading@10
|
99 int loop_end;
|
yading@10
|
100
|
yading@10
|
101 /* loop unrolled 8 times */
|
yading@10
|
102 __asm__ volatile (
|
yading@10
|
103 ".set push \n\t"
|
yading@10
|
104 ".set noreorder \n\t"
|
yading@10
|
105 "addiu %[loop_end], %[src], 1792 \n\t"
|
yading@10
|
106 "1: \n\t"
|
yading@10
|
107 "lw %[temp0], 0(%[src]) \n\t"
|
yading@10
|
108 "lw %[temp1], 4(%[src]) \n\t"
|
yading@10
|
109 "lw %[temp2], 8(%[src]) \n\t"
|
yading@10
|
110 "lw %[temp3], 12(%[src]) \n\t"
|
yading@10
|
111 "lw %[temp4], 16(%[src]) \n\t"
|
yading@10
|
112 "lw %[temp5], 20(%[src]) \n\t"
|
yading@10
|
113 "lw %[temp6], 24(%[src]) \n\t"
|
yading@10
|
114 "lw %[temp7], 28(%[src]) \n\t"
|
yading@10
|
115 "addiu %[src], %[src], 32 \n\t"
|
yading@10
|
116 "sw %[temp0], 0(%[dst]) \n\t"
|
yading@10
|
117 "sw %[temp1], 4(%[dst]) \n\t"
|
yading@10
|
118 "sw %[temp2], 8(%[dst]) \n\t"
|
yading@10
|
119 "sw %[temp3], 12(%[dst]) \n\t"
|
yading@10
|
120 "sw %[temp4], 16(%[dst]) \n\t"
|
yading@10
|
121 "sw %[temp5], 20(%[dst]) \n\t"
|
yading@10
|
122 "sw %[temp6], 24(%[dst]) \n\t"
|
yading@10
|
123 "sw %[temp7], 28(%[dst]) \n\t"
|
yading@10
|
124 "bne %[src], %[loop_end], 1b \n\t"
|
yading@10
|
125 " addiu %[dst], %[dst], 32 \n\t"
|
yading@10
|
126 ".set pop \n\t"
|
yading@10
|
127
|
yading@10
|
128 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
|
yading@10
|
129 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
|
yading@10
|
130 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
|
yading@10
|
131 [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
|
yading@10
|
132 [loop_end]"=&r"(loop_end), [src]"+r"(buf1),
|
yading@10
|
133 [dst]"+r"(buf2)
|
yading@10
|
134 :
|
yading@10
|
135 : "memory"
|
yading@10
|
136 );
|
yading@10
|
137 }
|
yading@10
|
138
|
yading@10
|
139 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
|
yading@10
|
140 {
|
yading@10
|
141 float wi;
|
yading@10
|
142 float wj;
|
yading@10
|
143 int i;
|
yading@10
|
144 float temp0, temp1, temp2, temp3;
|
yading@10
|
145 float *dst0 = out + 448 + 0*128;
|
yading@10
|
146 float *dst1 = dst0 + 64 + 63;
|
yading@10
|
147 float *dst2 = saved + 63;
|
yading@10
|
148 float *win0 = (float*)swindow;
|
yading@10
|
149 float *win1 = win0 + 64 + 63;
|
yading@10
|
150 float *win0_prev = (float*)swindow_prev;
|
yading@10
|
151 float *win1_prev = win0_prev + 64 + 63;
|
yading@10
|
152 float *src0_prev = saved + 448;
|
yading@10
|
153 float *src1_prev = buf + 0*128 + 63;
|
yading@10
|
154 float *src0 = buf + 0*128 + 64;
|
yading@10
|
155 float *src1 = buf + 1*128 + 63;
|
yading@10
|
156
|
yading@10
|
157 for(i = 0; i < 64; i++)
|
yading@10
|
158 {
|
yading@10
|
159 temp0 = src0_prev[0];
|
yading@10
|
160 temp1 = src1_prev[0];
|
yading@10
|
161 wi = *win0_prev;
|
yading@10
|
162 wj = *win1_prev;
|
yading@10
|
163 temp2 = src0[0];
|
yading@10
|
164 temp3 = src1[0];
|
yading@10
|
165 dst0[0] = temp0 * wj - temp1 * wi;
|
yading@10
|
166 dst1[0] = temp0 * wi + temp1 * wj;
|
yading@10
|
167
|
yading@10
|
168 wi = *win0;
|
yading@10
|
169 wj = *win1;
|
yading@10
|
170
|
yading@10
|
171 temp0 = src0[128];
|
yading@10
|
172 temp1 = src1[128];
|
yading@10
|
173 dst0[128] = temp2 * wj - temp3 * wi;
|
yading@10
|
174 dst1[128] = temp2 * wi + temp3 * wj;
|
yading@10
|
175
|
yading@10
|
176 temp2 = src0[256];
|
yading@10
|
177 temp3 = src1[256];
|
yading@10
|
178 dst0[256] = temp0 * wj - temp1 * wi;
|
yading@10
|
179 dst1[256] = temp0 * wi + temp1 * wj;
|
yading@10
|
180 dst0[384] = temp2 * wj - temp3 * wi;
|
yading@10
|
181 dst1[384] = temp2 * wi + temp3 * wj;
|
yading@10
|
182
|
yading@10
|
183 temp0 = src0[384];
|
yading@10
|
184 temp1 = src1[384];
|
yading@10
|
185 dst0[512] = temp0 * wj - temp1 * wi;
|
yading@10
|
186 dst2[0] = temp0 * wi + temp1 * wj;
|
yading@10
|
187
|
yading@10
|
188 src0++;
|
yading@10
|
189 src1--;
|
yading@10
|
190 src0_prev++;
|
yading@10
|
191 src1_prev--;
|
yading@10
|
192 win0++;
|
yading@10
|
193 win1--;
|
yading@10
|
194 win0_prev++;
|
yading@10
|
195 win1_prev--;
|
yading@10
|
196 dst0++;
|
yading@10
|
197 dst1--;
|
yading@10
|
198 dst2--;
|
yading@10
|
199 }
|
yading@10
|
200 }
|
yading@10
|
201 } else {
|
yading@10
|
202 ac->fdsp.vector_fmul_window(out + 448, saved + 448, buf, swindow_prev, 64);
|
yading@10
|
203 {
|
yading@10
|
204 float *buf1 = buf + 64;
|
yading@10
|
205 float *buf2 = out + 576;
|
yading@10
|
206 int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
|
yading@10
|
207 int loop_end;
|
yading@10
|
208
|
yading@10
|
209 /* loop unrolled 8 times */
|
yading@10
|
210 __asm__ volatile (
|
yading@10
|
211 ".set push \n\t"
|
yading@10
|
212 ".set noreorder \n\t"
|
yading@10
|
213 "addiu %[loop_end], %[src], 1792 \n\t"
|
yading@10
|
214 "1: \n\t"
|
yading@10
|
215 "lw %[temp0], 0(%[src]) \n\t"
|
yading@10
|
216 "lw %[temp1], 4(%[src]) \n\t"
|
yading@10
|
217 "lw %[temp2], 8(%[src]) \n\t"
|
yading@10
|
218 "lw %[temp3], 12(%[src]) \n\t"
|
yading@10
|
219 "lw %[temp4], 16(%[src]) \n\t"
|
yading@10
|
220 "lw %[temp5], 20(%[src]) \n\t"
|
yading@10
|
221 "lw %[temp6], 24(%[src]) \n\t"
|
yading@10
|
222 "lw %[temp7], 28(%[src]) \n\t"
|
yading@10
|
223 "addiu %[src], %[src], 32 \n\t"
|
yading@10
|
224 "sw %[temp0], 0(%[dst]) \n\t"
|
yading@10
|
225 "sw %[temp1], 4(%[dst]) \n\t"
|
yading@10
|
226 "sw %[temp2], 8(%[dst]) \n\t"
|
yading@10
|
227 "sw %[temp3], 12(%[dst]) \n\t"
|
yading@10
|
228 "sw %[temp4], 16(%[dst]) \n\t"
|
yading@10
|
229 "sw %[temp5], 20(%[dst]) \n\t"
|
yading@10
|
230 "sw %[temp6], 24(%[dst]) \n\t"
|
yading@10
|
231 "sw %[temp7], 28(%[dst]) \n\t"
|
yading@10
|
232 "bne %[src], %[loop_end], 1b \n\t"
|
yading@10
|
233 " addiu %[dst], %[dst], 32 \n\t"
|
yading@10
|
234 ".set pop \n\t"
|
yading@10
|
235
|
yading@10
|
236 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
|
yading@10
|
237 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
|
yading@10
|
238 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
|
yading@10
|
239 [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
|
yading@10
|
240 [loop_end]"=&r"(loop_end), [src]"+r"(buf1),
|
yading@10
|
241 [dst]"+r"(buf2)
|
yading@10
|
242 :
|
yading@10
|
243 : "memory"
|
yading@10
|
244 );
|
yading@10
|
245 }
|
yading@10
|
246 }
|
yading@10
|
247 }
|
yading@10
|
248
|
yading@10
|
249 // buffer update
|
yading@10
|
250 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
|
yading@10
|
251 ac->fdsp.vector_fmul_window(saved + 64, buf + 4*128 + 64, buf + 5*128, swindow, 64);
|
yading@10
|
252 ac->fdsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 64);
|
yading@10
|
253 ac->fdsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 64);
|
yading@10
|
254 {
|
yading@10
|
255 float *buf1 = buf + 7*128 + 64;
|
yading@10
|
256 float *buf2 = saved + 448;
|
yading@10
|
257 int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
|
yading@10
|
258 int loop_end;
|
yading@10
|
259
|
yading@10
|
260 /* loop unrolled 8 times */
|
yading@10
|
261 __asm__ volatile (
|
yading@10
|
262 ".set push \n\t"
|
yading@10
|
263 ".set noreorder \n\t"
|
yading@10
|
264 "addiu %[loop_end], %[src], 256 \n\t"
|
yading@10
|
265 "1: \n\t"
|
yading@10
|
266 "lw %[temp0], 0(%[src]) \n\t"
|
yading@10
|
267 "lw %[temp1], 4(%[src]) \n\t"
|
yading@10
|
268 "lw %[temp2], 8(%[src]) \n\t"
|
yading@10
|
269 "lw %[temp3], 12(%[src]) \n\t"
|
yading@10
|
270 "lw %[temp4], 16(%[src]) \n\t"
|
yading@10
|
271 "lw %[temp5], 20(%[src]) \n\t"
|
yading@10
|
272 "lw %[temp6], 24(%[src]) \n\t"
|
yading@10
|
273 "lw %[temp7], 28(%[src]) \n\t"
|
yading@10
|
274 "addiu %[src], %[src], 32 \n\t"
|
yading@10
|
275 "sw %[temp0], 0(%[dst]) \n\t"
|
yading@10
|
276 "sw %[temp1], 4(%[dst]) \n\t"
|
yading@10
|
277 "sw %[temp2], 8(%[dst]) \n\t"
|
yading@10
|
278 "sw %[temp3], 12(%[dst]) \n\t"
|
yading@10
|
279 "sw %[temp4], 16(%[dst]) \n\t"
|
yading@10
|
280 "sw %[temp5], 20(%[dst]) \n\t"
|
yading@10
|
281 "sw %[temp6], 24(%[dst]) \n\t"
|
yading@10
|
282 "sw %[temp7], 28(%[dst]) \n\t"
|
yading@10
|
283 "bne %[src], %[loop_end], 1b \n\t"
|
yading@10
|
284 " addiu %[dst], %[dst], 32 \n\t"
|
yading@10
|
285 ".set pop \n\t"
|
yading@10
|
286
|
yading@10
|
287 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
|
yading@10
|
288 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
|
yading@10
|
289 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
|
yading@10
|
290 [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
|
yading@10
|
291 [loop_end]"=&r"(loop_end), [src]"+r"(buf1),
|
yading@10
|
292 [dst]"+r"(buf2)
|
yading@10
|
293 :
|
yading@10
|
294 : "memory"
|
yading@10
|
295 );
|
yading@10
|
296 }
|
yading@10
|
297 } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
|
yading@10
|
298 float *buf1 = buf + 512;
|
yading@10
|
299 float *buf2 = saved;
|
yading@10
|
300 int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
|
yading@10
|
301 int loop_end;
|
yading@10
|
302
|
yading@10
|
303 /* loop unrolled 8 times */
|
yading@10
|
304 __asm__ volatile (
|
yading@10
|
305 ".set push \n\t"
|
yading@10
|
306 ".set noreorder \n\t"
|
yading@10
|
307 "addiu %[loop_end], %[src], 1792 \n\t"
|
yading@10
|
308 "1: \n\t"
|
yading@10
|
309 "lw %[temp0], 0(%[src]) \n\t"
|
yading@10
|
310 "lw %[temp1], 4(%[src]) \n\t"
|
yading@10
|
311 "lw %[temp2], 8(%[src]) \n\t"
|
yading@10
|
312 "lw %[temp3], 12(%[src]) \n\t"
|
yading@10
|
313 "lw %[temp4], 16(%[src]) \n\t"
|
yading@10
|
314 "lw %[temp5], 20(%[src]) \n\t"
|
yading@10
|
315 "lw %[temp6], 24(%[src]) \n\t"
|
yading@10
|
316 "lw %[temp7], 28(%[src]) \n\t"
|
yading@10
|
317 "addiu %[src], %[src], 32 \n\t"
|
yading@10
|
318 "sw %[temp0], 0(%[dst]) \n\t"
|
yading@10
|
319 "sw %[temp1], 4(%[dst]) \n\t"
|
yading@10
|
320 "sw %[temp2], 8(%[dst]) \n\t"
|
yading@10
|
321 "sw %[temp3], 12(%[dst]) \n\t"
|
yading@10
|
322 "sw %[temp4], 16(%[dst]) \n\t"
|
yading@10
|
323 "sw %[temp5], 20(%[dst]) \n\t"
|
yading@10
|
324 "sw %[temp6], 24(%[dst]) \n\t"
|
yading@10
|
325 "sw %[temp7], 28(%[dst]) \n\t"
|
yading@10
|
326 "bne %[src], %[loop_end], 1b \n\t"
|
yading@10
|
327 " addiu %[dst], %[dst], 32 \n\t"
|
yading@10
|
328 ".set pop \n\t"
|
yading@10
|
329
|
yading@10
|
330 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
|
yading@10
|
331 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
|
yading@10
|
332 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
|
yading@10
|
333 [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
|
yading@10
|
334 [loop_end]"=&r"(loop_end), [src]"+r"(buf1),
|
yading@10
|
335 [dst]"+r"(buf2)
|
yading@10
|
336 :
|
yading@10
|
337 : "memory"
|
yading@10
|
338 );
|
yading@10
|
339 {
|
yading@10
|
340 float *buf1 = buf + 7*128 + 64;
|
yading@10
|
341 float *buf2 = saved + 448;
|
yading@10
|
342 int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
|
yading@10
|
343 int loop_end;
|
yading@10
|
344
|
yading@10
|
345 /* loop unrolled 8 times */
|
yading@10
|
346 __asm__ volatile (
|
yading@10
|
347 ".set push \n\t"
|
yading@10
|
348 ".set noreorder \n\t"
|
yading@10
|
349 "addiu %[loop_end], %[src], 256 \n\t"
|
yading@10
|
350 "1: \n\t"
|
yading@10
|
351 "lw %[temp0], 0(%[src]) \n\t"
|
yading@10
|
352 "lw %[temp1], 4(%[src]) \n\t"
|
yading@10
|
353 "lw %[temp2], 8(%[src]) \n\t"
|
yading@10
|
354 "lw %[temp3], 12(%[src]) \n\t"
|
yading@10
|
355 "lw %[temp4], 16(%[src]) \n\t"
|
yading@10
|
356 "lw %[temp5], 20(%[src]) \n\t"
|
yading@10
|
357 "lw %[temp6], 24(%[src]) \n\t"
|
yading@10
|
358 "lw %[temp7], 28(%[src]) \n\t"
|
yading@10
|
359 "addiu %[src], %[src], 32 \n\t"
|
yading@10
|
360 "sw %[temp0], 0(%[dst]) \n\t"
|
yading@10
|
361 "sw %[temp1], 4(%[dst]) \n\t"
|
yading@10
|
362 "sw %[temp2], 8(%[dst]) \n\t"
|
yading@10
|
363 "sw %[temp3], 12(%[dst]) \n\t"
|
yading@10
|
364 "sw %[temp4], 16(%[dst]) \n\t"
|
yading@10
|
365 "sw %[temp5], 20(%[dst]) \n\t"
|
yading@10
|
366 "sw %[temp6], 24(%[dst]) \n\t"
|
yading@10
|
367 "sw %[temp7], 28(%[dst]) \n\t"
|
yading@10
|
368 "bne %[src], %[loop_end], 1b \n\t"
|
yading@10
|
369 " addiu %[dst], %[dst], 32 \n\t"
|
yading@10
|
370 ".set pop \n\t"
|
yading@10
|
371
|
yading@10
|
372 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
|
yading@10
|
373 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
|
yading@10
|
374 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
|
yading@10
|
375 [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
|
yading@10
|
376 [loop_end]"=&r"(loop_end), [src]"+r"(buf1),
|
yading@10
|
377 [dst]"+r"(buf2)
|
yading@10
|
378 :
|
yading@10
|
379 : "memory"
|
yading@10
|
380 );
|
yading@10
|
381 }
|
yading@10
|
382 } else { // LONG_STOP or ONLY_LONG
|
yading@10
|
383 float *buf1 = buf + 512;
|
yading@10
|
384 float *buf2 = saved;
|
yading@10
|
385 int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
|
yading@10
|
386 int loop_end;
|
yading@10
|
387
|
yading@10
|
388 /* loop unrolled 8 times */
|
yading@10
|
389 __asm__ volatile (
|
yading@10
|
390 ".set push \n\t"
|
yading@10
|
391 ".set noreorder \n\t"
|
yading@10
|
392 "addiu %[loop_end], %[src], 2048 \n\t"
|
yading@10
|
393 "1: \n\t"
|
yading@10
|
394 "lw %[temp0], 0(%[src]) \n\t"
|
yading@10
|
395 "lw %[temp1], 4(%[src]) \n\t"
|
yading@10
|
396 "lw %[temp2], 8(%[src]) \n\t"
|
yading@10
|
397 "lw %[temp3], 12(%[src]) \n\t"
|
yading@10
|
398 "lw %[temp4], 16(%[src]) \n\t"
|
yading@10
|
399 "lw %[temp5], 20(%[src]) \n\t"
|
yading@10
|
400 "lw %[temp6], 24(%[src]) \n\t"
|
yading@10
|
401 "lw %[temp7], 28(%[src]) \n\t"
|
yading@10
|
402 "addiu %[src], %[src], 32 \n\t"
|
yading@10
|
403 "sw %[temp0], 0(%[dst]) \n\t"
|
yading@10
|
404 "sw %[temp1], 4(%[dst]) \n\t"
|
yading@10
|
405 "sw %[temp2], 8(%[dst]) \n\t"
|
yading@10
|
406 "sw %[temp3], 12(%[dst]) \n\t"
|
yading@10
|
407 "sw %[temp4], 16(%[dst]) \n\t"
|
yading@10
|
408 "sw %[temp5], 20(%[dst]) \n\t"
|
yading@10
|
409 "sw %[temp6], 24(%[dst]) \n\t"
|
yading@10
|
410 "sw %[temp7], 28(%[dst]) \n\t"
|
yading@10
|
411 "bne %[src], %[loop_end], 1b \n\t"
|
yading@10
|
412 " addiu %[dst], %[dst], 32 \n\t"
|
yading@10
|
413 ".set pop \n\t"
|
yading@10
|
414
|
yading@10
|
415 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
|
yading@10
|
416 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
|
yading@10
|
417 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
|
yading@10
|
418 [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
|
yading@10
|
419 [loop_end]"=&r"(loop_end), [src]"+r"(buf1),
|
yading@10
|
420 [dst]"+r"(buf2)
|
yading@10
|
421 :
|
yading@10
|
422 : "memory"
|
yading@10
|
423 );
|
yading@10
|
424 }
|
yading@10
|
425 }
|
yading@10
|
426
|
yading@10
|
427 static void apply_ltp_mips(AACContext *ac, SingleChannelElement *sce)
|
yading@10
|
428 {
|
yading@10
|
429 const LongTermPrediction *ltp = &sce->ics.ltp;
|
yading@10
|
430 const uint16_t *offsets = sce->ics.swb_offset;
|
yading@10
|
431 int i, sfb;
|
yading@10
|
432 int j, k;
|
yading@10
|
433
|
yading@10
|
434 if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
|
yading@10
|
435 float *predTime = sce->ret;
|
yading@10
|
436 float *predFreq = ac->buf_mdct;
|
yading@10
|
437 float *p_predTime;
|
yading@10
|
438 int16_t num_samples = 2048;
|
yading@10
|
439
|
yading@10
|
440 if (ltp->lag < 1024)
|
yading@10
|
441 num_samples = ltp->lag + 1024;
|
yading@10
|
442 j = (2048 - num_samples) >> 2;
|
yading@10
|
443 k = (2048 - num_samples) & 3;
|
yading@10
|
444 p_predTime = &predTime[num_samples];
|
yading@10
|
445
|
yading@10
|
446 for (i = 0; i < num_samples; i++)
|
yading@10
|
447 predTime[i] = sce->ltp_state[i + 2048 - ltp->lag] * ltp->coef;
|
yading@10
|
448 for (i = 0; i < j; i++) {
|
yading@10
|
449
|
yading@10
|
450 /* loop unrolled 4 times */
|
yading@10
|
451 __asm__ volatile (
|
yading@10
|
452 "sw $0, 0(%[p_predTime]) \n\t"
|
yading@10
|
453 "sw $0, 4(%[p_predTime]) \n\t"
|
yading@10
|
454 "sw $0, 8(%[p_predTime]) \n\t"
|
yading@10
|
455 "sw $0, 12(%[p_predTime]) \n\t"
|
yading@10
|
456 "addiu %[p_predTime], %[p_predTime], 16 \n\t"
|
yading@10
|
457
|
yading@10
|
458 : [p_predTime]"+r"(p_predTime)
|
yading@10
|
459 :
|
yading@10
|
460 : "memory"
|
yading@10
|
461 );
|
yading@10
|
462 }
|
yading@10
|
463 for (i = 0; i < k; i++) {
|
yading@10
|
464
|
yading@10
|
465 __asm__ volatile (
|
yading@10
|
466 "sw $0, 0(%[p_predTime]) \n\t"
|
yading@10
|
467 "addiu %[p_predTime], %[p_predTime], 4 \n\t"
|
yading@10
|
468
|
yading@10
|
469 : [p_predTime]"+r"(p_predTime)
|
yading@10
|
470 :
|
yading@10
|
471 : "memory"
|
yading@10
|
472 );
|
yading@10
|
473 }
|
yading@10
|
474
|
yading@10
|
475 ac->windowing_and_mdct_ltp(ac, predFreq, predTime, &sce->ics);
|
yading@10
|
476
|
yading@10
|
477 if (sce->tns.present)
|
yading@10
|
478 ac->apply_tns(predFreq, &sce->tns, &sce->ics, 0);
|
yading@10
|
479
|
yading@10
|
480 for (sfb = 0; sfb < FFMIN(sce->ics.max_sfb, MAX_LTP_LONG_SFB); sfb++)
|
yading@10
|
481 if (ltp->used[sfb])
|
yading@10
|
482 for (i = offsets[sfb]; i < offsets[sfb + 1]; i++)
|
yading@10
|
483 sce->coeffs[i] += predFreq[i];
|
yading@10
|
484 }
|
yading@10
|
485 }
|
yading@10
|
486
|
yading@10
|
487 #if HAVE_MIPSFPU
|
yading@10
|
488 static void update_ltp_mips(AACContext *ac, SingleChannelElement *sce)
|
yading@10
|
489 {
|
yading@10
|
490 IndividualChannelStream *ics = &sce->ics;
|
yading@10
|
491 float *saved = sce->saved;
|
yading@10
|
492 float *saved_ltp = sce->coeffs;
|
yading@10
|
493 const float *lwindow = ics->use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
|
yading@10
|
494 const float *swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
|
yading@10
|
495 int i;
|
yading@10
|
496 int loop_end, loop_end1, loop_end2;
|
yading@10
|
497 float temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11;
|
yading@10
|
498
|
yading@10
|
499 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
|
yading@10
|
500 float *buf = saved;
|
yading@10
|
501 float *buf0 = saved_ltp;
|
yading@10
|
502 float *p_saved_ltp = saved_ltp + 576;
|
yading@10
|
503 float *ptr1 = &saved_ltp[512];
|
yading@10
|
504 float *ptr2 = &ac->buf_mdct[1023];
|
yading@10
|
505 float *ptr3 = (float*)&swindow[63];
|
yading@10
|
506 loop_end1 = (int)(p_saved_ltp + 448);
|
yading@10
|
507
|
yading@10
|
508 /* loop unrolled 8 times */
|
yading@10
|
509 __asm__ volatile (
|
yading@10
|
510 ".set push \n\t"
|
yading@10
|
511 ".set noreorder \n\t"
|
yading@10
|
512 "addiu %[loop_end], %[src], 2048 \n\t"
|
yading@10
|
513 "1: \n\t"
|
yading@10
|
514 "lw %[temp0], 0(%[src]) \n\t"
|
yading@10
|
515 "lw %[temp1], 4(%[src]) \n\t"
|
yading@10
|
516 "lw %[temp2], 8(%[src]) \n\t"
|
yading@10
|
517 "lw %[temp3], 12(%[src]) \n\t"
|
yading@10
|
518 "lw %[temp4], 16(%[src]) \n\t"
|
yading@10
|
519 "lw %[temp5], 20(%[src]) \n\t"
|
yading@10
|
520 "lw %[temp6], 24(%[src]) \n\t"
|
yading@10
|
521 "lw %[temp7], 28(%[src]) \n\t"
|
yading@10
|
522 "addiu %[src], %[src], 32 \n\t"
|
yading@10
|
523 "sw %[temp0], 0(%[dst]) \n\t"
|
yading@10
|
524 "sw %[temp1], 4(%[dst]) \n\t"
|
yading@10
|
525 "sw %[temp2], 8(%[dst]) \n\t"
|
yading@10
|
526 "sw %[temp3], 12(%[dst]) \n\t"
|
yading@10
|
527 "sw %[temp4], 16(%[dst]) \n\t"
|
yading@10
|
528 "sw %[temp5], 20(%[dst]) \n\t"
|
yading@10
|
529 "sw %[temp6], 24(%[dst]) \n\t"
|
yading@10
|
530 "sw %[temp7], 28(%[dst]) \n\t"
|
yading@10
|
531 "bne %[src], %[loop_end], 1b \n\t"
|
yading@10
|
532 " addiu %[dst], %[dst], 32 \n\t"
|
yading@10
|
533 ".set pop \n\t"
|
yading@10
|
534
|
yading@10
|
535 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
|
yading@10
|
536 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
|
yading@10
|
537 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
|
yading@10
|
538 [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
|
yading@10
|
539 [loop_end]"=&r"(loop_end), [src]"+r"(buf),
|
yading@10
|
540 [dst]"+r"(buf0)
|
yading@10
|
541 :
|
yading@10
|
542 : "memory"
|
yading@10
|
543 );
|
yading@10
|
544
|
yading@10
|
545 /* loop unrolled 8 times */
|
yading@10
|
546 __asm__ volatile (
|
yading@10
|
547 "1: \n\t"
|
yading@10
|
548 "sw $0, 0(%[p_saved_ltp]) \n\t"
|
yading@10
|
549 "sw $0, 4(%[p_saved_ltp]) \n\t"
|
yading@10
|
550 "sw $0, 8(%[p_saved_ltp]) \n\t"
|
yading@10
|
551 "sw $0, 12(%[p_saved_ltp]) \n\t"
|
yading@10
|
552 "sw $0, 16(%[p_saved_ltp]) \n\t"
|
yading@10
|
553 "sw $0, 20(%[p_saved_ltp]) \n\t"
|
yading@10
|
554 "sw $0, 24(%[p_saved_ltp]) \n\t"
|
yading@10
|
555 "sw $0, 28(%[p_saved_ltp]) \n\t"
|
yading@10
|
556 "addiu %[p_saved_ltp], %[p_saved_ltp], 32 \n\t"
|
yading@10
|
557 "bne %[p_saved_ltp], %[loop_end1], 1b \n\t"
|
yading@10
|
558
|
yading@10
|
559 : [p_saved_ltp]"+r"(p_saved_ltp)
|
yading@10
|
560 : [loop_end1]"r"(loop_end1)
|
yading@10
|
561 : "memory"
|
yading@10
|
562 );
|
yading@10
|
563
|
yading@10
|
564 ac->fdsp.vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960, &swindow[64], 64);
|
yading@10
|
565 for (i = 0; i < 16; i++){
|
yading@10
|
566 /* loop unrolled 4 times */
|
yading@10
|
567 __asm__ volatile (
|
yading@10
|
568 "lwc1 %[temp0], 0(%[ptr2]) \n\t"
|
yading@10
|
569 "lwc1 %[temp1], -4(%[ptr2]) \n\t"
|
yading@10
|
570 "lwc1 %[temp2], -8(%[ptr2]) \n\t"
|
yading@10
|
571 "lwc1 %[temp3], -12(%[ptr2]) \n\t"
|
yading@10
|
572 "lwc1 %[temp4], 0(%[ptr3]) \n\t"
|
yading@10
|
573 "lwc1 %[temp5], -4(%[ptr3]) \n\t"
|
yading@10
|
574 "lwc1 %[temp6], -8(%[ptr3]) \n\t"
|
yading@10
|
575 "lwc1 %[temp7], -12(%[ptr3]) \n\t"
|
yading@10
|
576 "mul.s %[temp8], %[temp0], %[temp4] \n\t"
|
yading@10
|
577 "mul.s %[temp9], %[temp1], %[temp5] \n\t"
|
yading@10
|
578 "mul.s %[temp10], %[temp2], %[temp6] \n\t"
|
yading@10
|
579 "mul.s %[temp11], %[temp3], %[temp7] \n\t"
|
yading@10
|
580 "swc1 %[temp8], 0(%[ptr1]) \n\t"
|
yading@10
|
581 "swc1 %[temp9], 4(%[ptr1]) \n\t"
|
yading@10
|
582 "swc1 %[temp10], 8(%[ptr1]) \n\t"
|
yading@10
|
583 "swc1 %[temp11], 12(%[ptr1]) \n\t"
|
yading@10
|
584 "addiu %[ptr1], %[ptr1], 16 \n\t"
|
yading@10
|
585 "addiu %[ptr2], %[ptr2], -16 \n\t"
|
yading@10
|
586 "addiu %[ptr3], %[ptr3], -16 \n\t"
|
yading@10
|
587
|
yading@10
|
588 : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1),
|
yading@10
|
589 [temp2]"=&f"(temp2), [temp3]"=&f"(temp3),
|
yading@10
|
590 [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
|
yading@10
|
591 [temp6]"=&f"(temp6), [temp7]"=&f"(temp7),
|
yading@10
|
592 [temp8]"=&f"(temp8), [temp9]"=&f"(temp9),
|
yading@10
|
593 [temp10]"=&f"(temp10), [temp11]"=&f"(temp11),
|
yading@10
|
594 [ptr1]"+r"(ptr1), [ptr2]"+r"(ptr2), [ptr3]"+r"(ptr3)
|
yading@10
|
595 :
|
yading@10
|
596 : "memory"
|
yading@10
|
597 );
|
yading@10
|
598 }
|
yading@10
|
599 } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
|
yading@10
|
600 float *buff0 = saved;
|
yading@10
|
601 float *buff1 = saved_ltp;
|
yading@10
|
602 float *ptr1 = &saved_ltp[512];
|
yading@10
|
603 float *ptr2 = &ac->buf_mdct[1023];
|
yading@10
|
604 float *ptr3 = (float*)&swindow[63];
|
yading@10
|
605 loop_end = (int)(saved + 448);
|
yading@10
|
606
|
yading@10
|
607 /* loop unrolled 8 times */
|
yading@10
|
608 __asm__ volatile (
|
yading@10
|
609 ".set push \n\t"
|
yading@10
|
610 ".set noreorder \n\t"
|
yading@10
|
611 "1: \n\t"
|
yading@10
|
612 "lw %[temp0], 0(%[src]) \n\t"
|
yading@10
|
613 "lw %[temp1], 4(%[src]) \n\t"
|
yading@10
|
614 "lw %[temp2], 8(%[src]) \n\t"
|
yading@10
|
615 "lw %[temp3], 12(%[src]) \n\t"
|
yading@10
|
616 "lw %[temp4], 16(%[src]) \n\t"
|
yading@10
|
617 "lw %[temp5], 20(%[src]) \n\t"
|
yading@10
|
618 "lw %[temp6], 24(%[src]) \n\t"
|
yading@10
|
619 "lw %[temp7], 28(%[src]) \n\t"
|
yading@10
|
620 "addiu %[src], %[src], 32 \n\t"
|
yading@10
|
621 "sw %[temp0], 0(%[dst]) \n\t"
|
yading@10
|
622 "sw %[temp1], 4(%[dst]) \n\t"
|
yading@10
|
623 "sw %[temp2], 8(%[dst]) \n\t"
|
yading@10
|
624 "sw %[temp3], 12(%[dst]) \n\t"
|
yading@10
|
625 "sw %[temp4], 16(%[dst]) \n\t"
|
yading@10
|
626 "sw %[temp5], 20(%[dst]) \n\t"
|
yading@10
|
627 "sw %[temp6], 24(%[dst]) \n\t"
|
yading@10
|
628 "sw %[temp7], 28(%[dst]) \n\t"
|
yading@10
|
629 "sw $0, 2304(%[dst]) \n\t"
|
yading@10
|
630 "sw $0, 2308(%[dst]) \n\t"
|
yading@10
|
631 "sw $0, 2312(%[dst]) \n\t"
|
yading@10
|
632 "sw $0, 2316(%[dst]) \n\t"
|
yading@10
|
633 "sw $0, 2320(%[dst]) \n\t"
|
yading@10
|
634 "sw $0, 2324(%[dst]) \n\t"
|
yading@10
|
635 "sw $0, 2328(%[dst]) \n\t"
|
yading@10
|
636 "sw $0, 2332(%[dst]) \n\t"
|
yading@10
|
637 "bne %[src], %[loop_end], 1b \n\t"
|
yading@10
|
638 " addiu %[dst], %[dst], 32 \n\t"
|
yading@10
|
639 ".set pop \n\t"
|
yading@10
|
640
|
yading@10
|
641 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
|
yading@10
|
642 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
|
yading@10
|
643 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
|
yading@10
|
644 [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
|
yading@10
|
645 [src]"+r"(buff0), [dst]"+r"(buff1)
|
yading@10
|
646 : [loop_end]"r"(loop_end)
|
yading@10
|
647 : "memory"
|
yading@10
|
648 );
|
yading@10
|
649 ac->fdsp.vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960, &swindow[64], 64);
|
yading@10
|
650 for (i = 0; i < 16; i++){
|
yading@10
|
651 /* loop unrolled 8 times */
|
yading@10
|
652 __asm__ volatile (
|
yading@10
|
653 "lwc1 %[temp0], 0(%[ptr2]) \n\t"
|
yading@10
|
654 "lwc1 %[temp1], -4(%[ptr2]) \n\t"
|
yading@10
|
655 "lwc1 %[temp2], -8(%[ptr2]) \n\t"
|
yading@10
|
656 "lwc1 %[temp3], -12(%[ptr2]) \n\t"
|
yading@10
|
657 "lwc1 %[temp4], 0(%[ptr3]) \n\t"
|
yading@10
|
658 "lwc1 %[temp5], -4(%[ptr3]) \n\t"
|
yading@10
|
659 "lwc1 %[temp6], -8(%[ptr3]) \n\t"
|
yading@10
|
660 "lwc1 %[temp7], -12(%[ptr3]) \n\t"
|
yading@10
|
661 "mul.s %[temp8], %[temp0], %[temp4] \n\t"
|
yading@10
|
662 "mul.s %[temp9], %[temp1], %[temp5] \n\t"
|
yading@10
|
663 "mul.s %[temp10], %[temp2], %[temp6] \n\t"
|
yading@10
|
664 "mul.s %[temp11], %[temp3], %[temp7] \n\t"
|
yading@10
|
665 "swc1 %[temp8], 0(%[ptr1]) \n\t"
|
yading@10
|
666 "swc1 %[temp9], 4(%[ptr1]) \n\t"
|
yading@10
|
667 "swc1 %[temp10], 8(%[ptr1]) \n\t"
|
yading@10
|
668 "swc1 %[temp11], 12(%[ptr1]) \n\t"
|
yading@10
|
669 "addiu %[ptr1], %[ptr1], 16 \n\t"
|
yading@10
|
670 "addiu %[ptr2], %[ptr2], -16 \n\t"
|
yading@10
|
671 "addiu %[ptr3], %[ptr3], -16 \n\t"
|
yading@10
|
672
|
yading@10
|
673 : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1),
|
yading@10
|
674 [temp2]"=&f"(temp2), [temp3]"=&f"(temp3),
|
yading@10
|
675 [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
|
yading@10
|
676 [temp6]"=&f"(temp6), [temp7]"=&f"(temp7),
|
yading@10
|
677 [temp8]"=&f"(temp8), [temp9]"=&f"(temp9),
|
yading@10
|
678 [temp10]"=&f"(temp10), [temp11]"=&f"(temp11),
|
yading@10
|
679 [ptr1]"+r"(ptr1), [ptr2]"+r"(ptr2), [ptr3]"+r"(ptr3)
|
yading@10
|
680 :
|
yading@10
|
681 : "memory"
|
yading@10
|
682 );
|
yading@10
|
683 }
|
yading@10
|
684 } else { // LONG_STOP or ONLY_LONG
|
yading@10
|
685 float *ptr1, *ptr2, *ptr3;
|
yading@10
|
686 ac->fdsp.vector_fmul_reverse(saved_ltp, ac->buf_mdct + 512, &lwindow[512], 512);
|
yading@10
|
687
|
yading@10
|
688 ptr1 = &saved_ltp[512];
|
yading@10
|
689 ptr2 = &ac->buf_mdct[1023];
|
yading@10
|
690 ptr3 = (float*)&lwindow[511];
|
yading@10
|
691
|
yading@10
|
692 for (i = 0; i < 512; i+=4){
|
yading@10
|
693 /* loop unrolled 4 times */
|
yading@10
|
694 __asm__ volatile (
|
yading@10
|
695 "lwc1 %[temp0], 0(%[ptr2]) \n\t"
|
yading@10
|
696 "lwc1 %[temp1], -4(%[ptr2]) \n\t"
|
yading@10
|
697 "lwc1 %[temp2], -8(%[ptr2]) \n\t"
|
yading@10
|
698 "lwc1 %[temp3], -12(%[ptr2]) \n\t"
|
yading@10
|
699 "lwc1 %[temp4], 0(%[ptr3]) \n\t"
|
yading@10
|
700 "lwc1 %[temp5], -4(%[ptr3]) \n\t"
|
yading@10
|
701 "lwc1 %[temp6], -8(%[ptr3]) \n\t"
|
yading@10
|
702 "lwc1 %[temp7], -12(%[ptr3]) \n\t"
|
yading@10
|
703 "mul.s %[temp8], %[temp0], %[temp4] \n\t"
|
yading@10
|
704 "mul.s %[temp9], %[temp1], %[temp5] \n\t"
|
yading@10
|
705 "mul.s %[temp10], %[temp2], %[temp6] \n\t"
|
yading@10
|
706 "mul.s %[temp11], %[temp3], %[temp7] \n\t"
|
yading@10
|
707 "swc1 %[temp8], 0(%[ptr1]) \n\t"
|
yading@10
|
708 "swc1 %[temp9], 4(%[ptr1]) \n\t"
|
yading@10
|
709 "swc1 %[temp10], 8(%[ptr1]) \n\t"
|
yading@10
|
710 "swc1 %[temp11], 12(%[ptr1]) \n\t"
|
yading@10
|
711 "addiu %[ptr1], %[ptr1], 16 \n\t"
|
yading@10
|
712 "addiu %[ptr2], %[ptr2], -16 \n\t"
|
yading@10
|
713 "addiu %[ptr3], %[ptr3], -16 \n\t"
|
yading@10
|
714
|
yading@10
|
715 : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1),
|
yading@10
|
716 [temp2]"=&f"(temp2), [temp3]"=&f"(temp3),
|
yading@10
|
717 [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
|
yading@10
|
718 [temp6]"=&f"(temp6), [temp7]"=&f"(temp7),
|
yading@10
|
719 [temp8]"=&f"(temp8), [temp9]"=&f"(temp9),
|
yading@10
|
720 [temp10]"=&f"(temp10), [temp11]"=&f"(temp11),
|
yading@10
|
721 [ptr1]"+r"(ptr1), [ptr2]"+r"(ptr2),
|
yading@10
|
722 [ptr3]"+r"(ptr3)
|
yading@10
|
723 :
|
yading@10
|
724 : "memory"
|
yading@10
|
725 );
|
yading@10
|
726 }
|
yading@10
|
727 }
|
yading@10
|
728
|
yading@10
|
729 {
|
yading@10
|
730 float *buf1 = sce->ltp_state+1024;
|
yading@10
|
731 float *buf2 = sce->ltp_state;
|
yading@10
|
732 float *buf3 = sce->ret;
|
yading@10
|
733 float *buf4 = sce->ltp_state+1024;
|
yading@10
|
734 float *buf5 = saved_ltp;
|
yading@10
|
735 float *buf6 = sce->ltp_state+2048;
|
yading@10
|
736
|
yading@10
|
737 /* loops unrolled 8 times */
|
yading@10
|
738 __asm__ volatile (
|
yading@10
|
739 ".set push \n\t"
|
yading@10
|
740 ".set noreorder \n\t"
|
yading@10
|
741 "addiu %[loop_end], %[src], 4096 \n\t"
|
yading@10
|
742 "addiu %[loop_end1], %[src1], 4096 \n\t"
|
yading@10
|
743 "addiu %[loop_end2], %[src2], 4096 \n\t"
|
yading@10
|
744 "1: \n\t"
|
yading@10
|
745 "lw %[temp0], 0(%[src]) \n\t"
|
yading@10
|
746 "lw %[temp1], 4(%[src]) \n\t"
|
yading@10
|
747 "lw %[temp2], 8(%[src]) \n\t"
|
yading@10
|
748 "lw %[temp3], 12(%[src]) \n\t"
|
yading@10
|
749 "lw %[temp4], 16(%[src]) \n\t"
|
yading@10
|
750 "lw %[temp5], 20(%[src]) \n\t"
|
yading@10
|
751 "lw %[temp6], 24(%[src]) \n\t"
|
yading@10
|
752 "lw %[temp7], 28(%[src]) \n\t"
|
yading@10
|
753 "addiu %[src], %[src], 32 \n\t"
|
yading@10
|
754 "sw %[temp0], 0(%[dst]) \n\t"
|
yading@10
|
755 "sw %[temp1], 4(%[dst]) \n\t"
|
yading@10
|
756 "sw %[temp2], 8(%[dst]) \n\t"
|
yading@10
|
757 "sw %[temp3], 12(%[dst]) \n\t"
|
yading@10
|
758 "sw %[temp4], 16(%[dst]) \n\t"
|
yading@10
|
759 "sw %[temp5], 20(%[dst]) \n\t"
|
yading@10
|
760 "sw %[temp6], 24(%[dst]) \n\t"
|
yading@10
|
761 "sw %[temp7], 28(%[dst]) \n\t"
|
yading@10
|
762 "bne %[src], %[loop_end], 1b \n\t"
|
yading@10
|
763 " addiu %[dst], %[dst], 32 \n\t"
|
yading@10
|
764 "2: \n\t"
|
yading@10
|
765 "lw %[temp0], 0(%[src1]) \n\t"
|
yading@10
|
766 "lw %[temp1], 4(%[src1]) \n\t"
|
yading@10
|
767 "lw %[temp2], 8(%[src1]) \n\t"
|
yading@10
|
768 "lw %[temp3], 12(%[src1]) \n\t"
|
yading@10
|
769 "lw %[temp4], 16(%[src1]) \n\t"
|
yading@10
|
770 "lw %[temp5], 20(%[src1]) \n\t"
|
yading@10
|
771 "lw %[temp6], 24(%[src1]) \n\t"
|
yading@10
|
772 "lw %[temp7], 28(%[src1]) \n\t"
|
yading@10
|
773 "addiu %[src1], %[src1], 32 \n\t"
|
yading@10
|
774 "sw %[temp0], 0(%[dst1]) \n\t"
|
yading@10
|
775 "sw %[temp1], 4(%[dst1]) \n\t"
|
yading@10
|
776 "sw %[temp2], 8(%[dst1]) \n\t"
|
yading@10
|
777 "sw %[temp3], 12(%[dst1]) \n\t"
|
yading@10
|
778 "sw %[temp4], 16(%[dst1]) \n\t"
|
yading@10
|
779 "sw %[temp5], 20(%[dst1]) \n\t"
|
yading@10
|
780 "sw %[temp6], 24(%[dst1]) \n\t"
|
yading@10
|
781 "sw %[temp7], 28(%[dst1]) \n\t"
|
yading@10
|
782 "bne %[src1], %[loop_end1], 2b \n\t"
|
yading@10
|
783 " addiu %[dst1], %[dst1], 32 \n\t"
|
yading@10
|
784 "3: \n\t"
|
yading@10
|
785 "lw %[temp0], 0(%[src2]) \n\t"
|
yading@10
|
786 "lw %[temp1], 4(%[src2]) \n\t"
|
yading@10
|
787 "lw %[temp2], 8(%[src2]) \n\t"
|
yading@10
|
788 "lw %[temp3], 12(%[src2]) \n\t"
|
yading@10
|
789 "lw %[temp4], 16(%[src2]) \n\t"
|
yading@10
|
790 "lw %[temp5], 20(%[src2]) \n\t"
|
yading@10
|
791 "lw %[temp6], 24(%[src2]) \n\t"
|
yading@10
|
792 "lw %[temp7], 28(%[src2]) \n\t"
|
yading@10
|
793 "addiu %[src2], %[src2], 32 \n\t"
|
yading@10
|
794 "sw %[temp0], 0(%[dst2]) \n\t"
|
yading@10
|
795 "sw %[temp1], 4(%[dst2]) \n\t"
|
yading@10
|
796 "sw %[temp2], 8(%[dst2]) \n\t"
|
yading@10
|
797 "sw %[temp3], 12(%[dst2]) \n\t"
|
yading@10
|
798 "sw %[temp4], 16(%[dst2]) \n\t"
|
yading@10
|
799 "sw %[temp5], 20(%[dst2]) \n\t"
|
yading@10
|
800 "sw %[temp6], 24(%[dst2]) \n\t"
|
yading@10
|
801 "sw %[temp7], 28(%[dst2]) \n\t"
|
yading@10
|
802 "bne %[src2], %[loop_end2], 3b \n\t"
|
yading@10
|
803 " addiu %[dst2], %[dst2], 32 \n\t"
|
yading@10
|
804 ".set pop \n\t"
|
yading@10
|
805
|
yading@10
|
806 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
|
yading@10
|
807 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
|
yading@10
|
808 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
|
yading@10
|
809 [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
|
yading@10
|
810 [loop_end]"=&r"(loop_end), [loop_end1]"=&r"(loop_end1),
|
yading@10
|
811 [loop_end2]"=&r"(loop_end2), [src]"+r"(buf1),
|
yading@10
|
812 [dst]"+r"(buf2), [src1]"+r"(buf3), [dst1]"+r"(buf4),
|
yading@10
|
813 [src2]"+r"(buf5), [dst2]"+r"(buf6)
|
yading@10
|
814 :
|
yading@10
|
815 : "memory"
|
yading@10
|
816 );
|
yading@10
|
817 }
|
yading@10
|
818 }
|
yading@10
|
819 #endif /* HAVE_MIPSFPU */
|
yading@10
|
820 #endif /* HAVE_INLINE_ASM */
|
yading@10
|
821
|
yading@10
|
822 void ff_aacdec_init_mips(AACContext *c)
|
yading@10
|
823 {
|
yading@10
|
824 #if HAVE_INLINE_ASM
|
yading@10
|
825 c->imdct_and_windowing = imdct_and_windowing_mips;
|
yading@10
|
826 c->apply_ltp = apply_ltp_mips;
|
yading@10
|
827 #if HAVE_MIPSFPU
|
yading@10
|
828 c->update_ltp = update_ltp_mips;
|
yading@10
|
829 #endif /* HAVE_MIPSFPU */
|
yading@10
|
830 #endif /* HAVE_INLINE_ASM */
|
yading@10
|
831 }
|