yading@10
|
1 /*
|
yading@10
|
2 * Copyright (c) 2012
|
yading@10
|
3 * MIPS Technologies, Inc., California.
|
yading@10
|
4 *
|
yading@10
|
5 * Redistribution and use in source and binary forms, with or without
|
yading@10
|
6 * modification, are permitted provided that the following conditions
|
yading@10
|
7 * are met:
|
yading@10
|
8 * 1. Redistributions of source code must retain the above copyright
|
yading@10
|
9 * notice, this list of conditions and the following disclaimer.
|
yading@10
|
10 * 2. Redistributions in binary form must reproduce the above copyright
|
yading@10
|
11 * notice, this list of conditions and the following disclaimer in the
|
yading@10
|
12 * documentation and/or other materials provided with the distribution.
|
yading@10
|
13 * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
|
yading@10
|
14 * contributors may be used to endorse or promote products derived from
|
yading@10
|
15 * this software without specific prior written permission.
|
yading@10
|
16 *
|
yading@10
|
17 * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
|
yading@10
|
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
yading@10
|
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
yading@10
|
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
|
yading@10
|
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
yading@10
|
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
yading@10
|
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
yading@10
|
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
yading@10
|
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
yading@10
|
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
yading@10
|
27 * SUCH DAMAGE.
|
yading@10
|
28 *
|
yading@10
|
29 * Authors: Djordje Pesut (djordje@mips.com)
|
yading@10
|
30 * Mirjana Vulin (mvulin@mips.com)
|
yading@10
|
31 *
|
yading@10
|
32 * This file is part of FFmpeg.
|
yading@10
|
33 *
|
yading@10
|
34 * FFmpeg is free software; you can redistribute it and/or
|
yading@10
|
35 * modify it under the terms of the GNU Lesser General Public
|
yading@10
|
36 * License as published by the Free Software Foundation; either
|
yading@10
|
37 * version 2.1 of the License, or (at your option) any later version.
|
yading@10
|
38 *
|
yading@10
|
39 * FFmpeg is distributed in the hope that it will be useful,
|
yading@10
|
40 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
yading@10
|
41 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
yading@10
|
42 * Lesser General Public License for more details.
|
yading@10
|
43 *
|
yading@10
|
44 * You should have received a copy of the GNU Lesser General Public
|
yading@10
|
45 * License along with FFmpeg; if not, write to the Free Software
|
yading@10
|
46 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
yading@10
|
47 */
|
yading@10
|
48
|
yading@10
|
49 /**
|
yading@10
|
50 * @file
|
yading@10
|
51 * Reference: libavcodec/aacsbr.c
|
yading@10
|
52 */
|
yading@10
|
53
|
yading@10
|
54 #include "libavcodec/aac.h"
|
yading@10
|
55 #include "libavcodec/aacsbr.h"
|
yading@10
|
56
|
yading@10
|
57 #define ENVELOPE_ADJUSTMENT_OFFSET 2
|
yading@10
|
58
|
yading@10
|
59 #if HAVE_INLINE_ASM
|
yading@10
|
60 static int sbr_lf_gen_mips(AACContext *ac, SpectralBandReplication *sbr,
|
yading@10
|
61 float X_low[32][40][2], const float W[2][32][32][2],
|
yading@10
|
62 int buf_idx)
|
yading@10
|
63 {
|
yading@10
|
64 int i, k;
|
yading@10
|
65 int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
|
yading@10
|
66 float *p_x_low = &X_low[0][8][0];
|
yading@10
|
67 float *p_w = (float*)&W[buf_idx][0][0][0];
|
yading@10
|
68 float *p_x1_low = &X_low[0][0][0];
|
yading@10
|
69 float *p_w1 = (float*)&W[1-buf_idx][24][0][0];
|
yading@10
|
70
|
yading@10
|
71 float *loop_end=p_x1_low + 2560;
|
yading@10
|
72
|
yading@10
|
73 /* loop unrolled 8 times */
|
yading@10
|
74 __asm__ volatile (
|
yading@10
|
75 "1: \n\t"
|
yading@10
|
76 "sw $0, 0(%[p_x1_low]) \n\t"
|
yading@10
|
77 "sw $0, 4(%[p_x1_low]) \n\t"
|
yading@10
|
78 "sw $0, 8(%[p_x1_low]) \n\t"
|
yading@10
|
79 "sw $0, 12(%[p_x1_low]) \n\t"
|
yading@10
|
80 "sw $0, 16(%[p_x1_low]) \n\t"
|
yading@10
|
81 "sw $0, 20(%[p_x1_low]) \n\t"
|
yading@10
|
82 "sw $0, 24(%[p_x1_low]) \n\t"
|
yading@10
|
83 "sw $0, 28(%[p_x1_low]) \n\t"
|
yading@10
|
84 "addiu %[p_x1_low], %[p_x1_low], 32 \n\t"
|
yading@10
|
85 "bne %[p_x1_low], %[loop_end], 1b \n\t"
|
yading@10
|
86 "addiu %[p_x1_low], %[p_x1_low], -10240 \n\t"
|
yading@10
|
87
|
yading@10
|
88 : [p_x1_low]"+r"(p_x1_low)
|
yading@10
|
89 : [loop_end]"r"(loop_end)
|
yading@10
|
90 : "memory"
|
yading@10
|
91 );
|
yading@10
|
92
|
yading@10
|
93 for (k = 0; k < sbr->kx[1]; k++) {
|
yading@10
|
94 for (i = 0; i < 32; i+=4) {
|
yading@10
|
95 /* loop unrolled 4 times */
|
yading@10
|
96 __asm__ volatile (
|
yading@10
|
97 "lw %[temp0], 0(%[p_w]) \n\t"
|
yading@10
|
98 "lw %[temp1], 4(%[p_w]) \n\t"
|
yading@10
|
99 "lw %[temp2], 256(%[p_w]) \n\t"
|
yading@10
|
100 "lw %[temp3], 260(%[p_w]) \n\t"
|
yading@10
|
101 "lw %[temp4], 512(%[p_w]) \n\t"
|
yading@10
|
102 "lw %[temp5], 516(%[p_w]) \n\t"
|
yading@10
|
103 "lw %[temp6], 768(%[p_w]) \n\t"
|
yading@10
|
104 "lw %[temp7], 772(%[p_w]) \n\t"
|
yading@10
|
105 "sw %[temp0], 0(%[p_x_low]) \n\t"
|
yading@10
|
106 "sw %[temp1], 4(%[p_x_low]) \n\t"
|
yading@10
|
107 "sw %[temp2], 8(%[p_x_low]) \n\t"
|
yading@10
|
108 "sw %[temp3], 12(%[p_x_low]) \n\t"
|
yading@10
|
109 "sw %[temp4], 16(%[p_x_low]) \n\t"
|
yading@10
|
110 "sw %[temp5], 20(%[p_x_low]) \n\t"
|
yading@10
|
111 "sw %[temp6], 24(%[p_x_low]) \n\t"
|
yading@10
|
112 "sw %[temp7], 28(%[p_x_low]) \n\t"
|
yading@10
|
113 "addiu %[p_x_low], %[p_x_low], 32 \n\t"
|
yading@10
|
114 "addiu %[p_w], %[p_w], 1024 \n\t"
|
yading@10
|
115
|
yading@10
|
116 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
|
yading@10
|
117 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
|
yading@10
|
118 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
|
yading@10
|
119 [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
|
yading@10
|
120 [p_w]"+r"(p_w), [p_x_low]"+r"(p_x_low)
|
yading@10
|
121 :
|
yading@10
|
122 : "memory"
|
yading@10
|
123 );
|
yading@10
|
124 }
|
yading@10
|
125 p_x_low += 16;
|
yading@10
|
126 p_w -= 2046;
|
yading@10
|
127 }
|
yading@10
|
128
|
yading@10
|
129 for (k = 0; k < sbr->kx[0]; k++) {
|
yading@10
|
130 for (i = 0; i < 2; i++) {
|
yading@10
|
131
|
yading@10
|
132 /* loop unrolled 4 times */
|
yading@10
|
133 __asm__ volatile (
|
yading@10
|
134 "lw %[temp0], 0(%[p_w1]) \n\t"
|
yading@10
|
135 "lw %[temp1], 4(%[p_w1]) \n\t"
|
yading@10
|
136 "lw %[temp2], 256(%[p_w1]) \n\t"
|
yading@10
|
137 "lw %[temp3], 260(%[p_w1]) \n\t"
|
yading@10
|
138 "lw %[temp4], 512(%[p_w1]) \n\t"
|
yading@10
|
139 "lw %[temp5], 516(%[p_w1]) \n\t"
|
yading@10
|
140 "lw %[temp6], 768(%[p_w1]) \n\t"
|
yading@10
|
141 "lw %[temp7], 772(%[p_w1]) \n\t"
|
yading@10
|
142 "sw %[temp0], 0(%[p_x1_low]) \n\t"
|
yading@10
|
143 "sw %[temp1], 4(%[p_x1_low]) \n\t"
|
yading@10
|
144 "sw %[temp2], 8(%[p_x1_low]) \n\t"
|
yading@10
|
145 "sw %[temp3], 12(%[p_x1_low]) \n\t"
|
yading@10
|
146 "sw %[temp4], 16(%[p_x1_low]) \n\t"
|
yading@10
|
147 "sw %[temp5], 20(%[p_x1_low]) \n\t"
|
yading@10
|
148 "sw %[temp6], 24(%[p_x1_low]) \n\t"
|
yading@10
|
149 "sw %[temp7], 28(%[p_x1_low]) \n\t"
|
yading@10
|
150 "addiu %[p_x1_low], %[p_x1_low], 32 \n\t"
|
yading@10
|
151 "addiu %[p_w1], %[p_w1], 1024 \n\t"
|
yading@10
|
152
|
yading@10
|
153 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
|
yading@10
|
154 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
|
yading@10
|
155 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
|
yading@10
|
156 [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
|
yading@10
|
157 [p_w1]"+r"(p_w1), [p_x1_low]"+r"(p_x1_low)
|
yading@10
|
158 :
|
yading@10
|
159 : "memory"
|
yading@10
|
160 );
|
yading@10
|
161 }
|
yading@10
|
162 p_x1_low += 64;
|
yading@10
|
163 p_w1 -= 510;
|
yading@10
|
164 }
|
yading@10
|
165 return 0;
|
yading@10
|
166 }
|
yading@10
|
167
|
yading@10
|
168 static int sbr_x_gen_mips(SpectralBandReplication *sbr, float X[2][38][64],
|
yading@10
|
169 const float Y0[38][64][2], const float Y1[38][64][2],
|
yading@10
|
170 const float X_low[32][40][2], int ch)
|
yading@10
|
171 {
|
yading@10
|
172 int k, i;
|
yading@10
|
173 const int i_f = 32;
|
yading@10
|
174 int temp0, temp1, temp2, temp3;
|
yading@10
|
175 const float *X_low1, *Y01, *Y11;
|
yading@10
|
176 float *x1=&X[0][0][0];
|
yading@10
|
177 float *j=x1+4864;
|
yading@10
|
178 const int i_Temp = FFMAX(2*sbr->data[ch].t_env_num_env_old - i_f, 0);
|
yading@10
|
179
|
yading@10
|
180 /* loop unrolled 8 times */
|
yading@10
|
181 __asm__ volatile (
|
yading@10
|
182 "1: \n\t"
|
yading@10
|
183 "sw $0, 0(%[x1]) \n\t"
|
yading@10
|
184 "sw $0, 4(%[x1]) \n\t"
|
yading@10
|
185 "sw $0, 8(%[x1]) \n\t"
|
yading@10
|
186 "sw $0, 12(%[x1]) \n\t"
|
yading@10
|
187 "sw $0, 16(%[x1]) \n\t"
|
yading@10
|
188 "sw $0, 20(%[x1]) \n\t"
|
yading@10
|
189 "sw $0, 24(%[x1]) \n\t"
|
yading@10
|
190 "sw $0, 28(%[x1]) \n\t"
|
yading@10
|
191 "addiu %[x1], %[x1], 32 \n\t"
|
yading@10
|
192 "bne %[x1], %[j], 1b \n\t"
|
yading@10
|
193 "addiu %[x1], %[x1], -19456 \n\t"
|
yading@10
|
194
|
yading@10
|
195 : [x1]"+r"(x1)
|
yading@10
|
196 : [j]"r"(j)
|
yading@10
|
197 : "memory"
|
yading@10
|
198 );
|
yading@10
|
199
|
yading@10
|
200 if (i_Temp != 0) {
|
yading@10
|
201
|
yading@10
|
202 X_low1=&X_low[0][2][0];
|
yading@10
|
203
|
yading@10
|
204 for (k = 0; k < sbr->kx[0]; k++) {
|
yading@10
|
205
|
yading@10
|
206 __asm__ volatile (
|
yading@10
|
207 "move %[i], $zero \n\t"
|
yading@10
|
208 "2: \n\t"
|
yading@10
|
209 "lw %[temp0], 0(%[X_low1]) \n\t"
|
yading@10
|
210 "lw %[temp1], 4(%[X_low1]) \n\t"
|
yading@10
|
211 "sw %[temp0], 0(%[x1]) \n\t"
|
yading@10
|
212 "sw %[temp1], 9728(%[x1]) \n\t"
|
yading@10
|
213 "addiu %[x1], %[x1], 256 \n\t"
|
yading@10
|
214 "addiu %[X_low1], %[X_low1], 8 \n\t"
|
yading@10
|
215 "addiu %[i], %[i], 1 \n\t"
|
yading@10
|
216 "bne %[i], %[i_Temp], 2b \n\t"
|
yading@10
|
217
|
yading@10
|
218 : [x1]"+r"(x1), [X_low1]"+r"(X_low1), [i]"=&r"(i),
|
yading@10
|
219 [temp0]"=&r"(temp0), [temp1]"=&r"(temp1)
|
yading@10
|
220 : [i_Temp]"r"(i_Temp)
|
yading@10
|
221 : "memory"
|
yading@10
|
222 );
|
yading@10
|
223 x1-=(i_Temp<<6)-1;
|
yading@10
|
224 X_low1-=(i_Temp<<1)-80;
|
yading@10
|
225 }
|
yading@10
|
226
|
yading@10
|
227 x1=&X[0][0][k];
|
yading@10
|
228 Y01=(float*)&Y0[32][k][0];
|
yading@10
|
229
|
yading@10
|
230 for (; k < sbr->kx[0] + sbr->m[0]; k++) {
|
yading@10
|
231 __asm__ volatile (
|
yading@10
|
232 "move %[i], $zero \n\t"
|
yading@10
|
233 "3: \n\t"
|
yading@10
|
234 "lw %[temp0], 0(%[Y01]) \n\t"
|
yading@10
|
235 "lw %[temp1], 4(%[Y01]) \n\t"
|
yading@10
|
236 "sw %[temp0], 0(%[x1]) \n\t"
|
yading@10
|
237 "sw %[temp1], 9728(%[x1]) \n\t"
|
yading@10
|
238 "addiu %[x1], %[x1], 256 \n\t"
|
yading@10
|
239 "addiu %[Y01], %[Y01], 512 \n\t"
|
yading@10
|
240 "addiu %[i], %[i], 1 \n\t"
|
yading@10
|
241 "bne %[i], %[i_Temp], 3b \n\t"
|
yading@10
|
242
|
yading@10
|
243 : [x1]"+r"(x1), [Y01]"+r"(Y01), [i]"=&r"(i),
|
yading@10
|
244 [temp0]"=&r"(temp0), [temp1]"=&r"(temp1)
|
yading@10
|
245 : [i_Temp]"r"(i_Temp)
|
yading@10
|
246 : "memory"
|
yading@10
|
247 );
|
yading@10
|
248 x1 -=(i_Temp<<6)-1;
|
yading@10
|
249 Y01 -=(i_Temp<<7)-2;
|
yading@10
|
250 }
|
yading@10
|
251 }
|
yading@10
|
252
|
yading@10
|
253 x1=&X[0][i_Temp][0];
|
yading@10
|
254 X_low1=&X_low[0][i_Temp+2][0];
|
yading@10
|
255 temp3=38;
|
yading@10
|
256
|
yading@10
|
257 for (k = 0; k < sbr->kx[1]; k++) {
|
yading@10
|
258
|
yading@10
|
259 __asm__ volatile (
|
yading@10
|
260 "move %[i], %[i_Temp] \n\t"
|
yading@10
|
261 "4: \n\t"
|
yading@10
|
262 "lw %[temp0], 0(%[X_low1]) \n\t"
|
yading@10
|
263 "lw %[temp1], 4(%[X_low1]) \n\t"
|
yading@10
|
264 "sw %[temp0], 0(%[x1]) \n\t"
|
yading@10
|
265 "sw %[temp1], 9728(%[x1]) \n\t"
|
yading@10
|
266 "addiu %[x1], %[x1], 256 \n\t"
|
yading@10
|
267 "addiu %[X_low1], %[X_low1], 8 \n\t"
|
yading@10
|
268 "addiu %[i], %[i], 1 \n\t"
|
yading@10
|
269 "bne %[i], %[temp3], 4b \n\t"
|
yading@10
|
270
|
yading@10
|
271 : [x1]"+r"(x1), [X_low1]"+r"(X_low1), [i]"=&r"(i),
|
yading@10
|
272 [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
|
yading@10
|
273 [temp2]"=&r"(temp2)
|
yading@10
|
274 : [i_Temp]"r"(i_Temp), [temp3]"r"(temp3)
|
yading@10
|
275 : "memory"
|
yading@10
|
276 );
|
yading@10
|
277 x1 -= ((38-i_Temp)<<6)-1;
|
yading@10
|
278 X_low1 -= ((38-i_Temp)<<1)- 80;
|
yading@10
|
279 }
|
yading@10
|
280
|
yading@10
|
281 x1=&X[0][i_Temp][k];
|
yading@10
|
282 Y11=&Y1[i_Temp][k][0];
|
yading@10
|
283 temp2=32;
|
yading@10
|
284
|
yading@10
|
285 for (; k < sbr->kx[1] + sbr->m[1]; k++) {
|
yading@10
|
286
|
yading@10
|
287 __asm__ volatile (
|
yading@10
|
288 "move %[i], %[i_Temp] \n\t"
|
yading@10
|
289 "5: \n\t"
|
yading@10
|
290 "lw %[temp0], 0(%[Y11]) \n\t"
|
yading@10
|
291 "lw %[temp1], 4(%[Y11]) \n\t"
|
yading@10
|
292 "sw %[temp0], 0(%[x1]) \n\t"
|
yading@10
|
293 "sw %[temp1], 9728(%[x1]) \n\t"
|
yading@10
|
294 "addiu %[x1], %[x1], 256 \n\t"
|
yading@10
|
295 "addiu %[Y11], %[Y11], 512 \n\t"
|
yading@10
|
296 "addiu %[i], %[i], 1 \n\t"
|
yading@10
|
297 "bne %[i], %[temp2], 5b \n\t"
|
yading@10
|
298
|
yading@10
|
299 : [x1]"+r"(x1), [Y11]"+r"(Y11), [i]"=&r"(i),
|
yading@10
|
300 [temp0]"=&r"(temp0), [temp1]"=&r"(temp1)
|
yading@10
|
301 : [i_Temp]"r"(i_Temp), [temp3]"r"(temp3),
|
yading@10
|
302 [temp2]"r"(temp2)
|
yading@10
|
303 : "memory"
|
yading@10
|
304 );
|
yading@10
|
305
|
yading@10
|
306 x1 -= ((32-i_Temp)<<6)-1;
|
yading@10
|
307 Y11 -= ((32-i_Temp)<<7)-2;
|
yading@10
|
308 }
|
yading@10
|
309 return 0;
|
yading@10
|
310 }
|
yading@10
|
311
|
yading@10
|
312 #if HAVE_MIPSFPU
|
yading@10
|
313 static void sbr_hf_assemble_mips(float Y1[38][64][2],
|
yading@10
|
314 const float X_high[64][40][2],
|
yading@10
|
315 SpectralBandReplication *sbr, SBRData *ch_data,
|
yading@10
|
316 const int e_a[2])
|
yading@10
|
317 {
|
yading@10
|
318 int e, i, j, m;
|
yading@10
|
319 const int h_SL = 4 * !sbr->bs_smoothing_mode;
|
yading@10
|
320 const int kx = sbr->kx[1];
|
yading@10
|
321 const int m_max = sbr->m[1];
|
yading@10
|
322 static const float h_smooth[5] = {
|
yading@10
|
323 0.33333333333333,
|
yading@10
|
324 0.30150283239582,
|
yading@10
|
325 0.21816949906249,
|
yading@10
|
326 0.11516383427084,
|
yading@10
|
327 0.03183050093751,
|
yading@10
|
328 };
|
yading@10
|
329
|
yading@10
|
330 float (*g_temp)[48] = ch_data->g_temp, (*q_temp)[48] = ch_data->q_temp;
|
yading@10
|
331 int indexnoise = ch_data->f_indexnoise;
|
yading@10
|
332 int indexsine = ch_data->f_indexsine;
|
yading@10
|
333 float *g_temp1, *q_temp1, *pok, *pok1;
|
yading@10
|
334 float temp1, temp2, temp3, temp4;
|
yading@10
|
335 int size = m_max;
|
yading@10
|
336
|
yading@10
|
337 if (sbr->reset) {
|
yading@10
|
338 for (i = 0; i < h_SL; i++) {
|
yading@10
|
339 memcpy(g_temp[i + 2*ch_data->t_env[0]], sbr->gain[0], m_max * sizeof(sbr->gain[0][0]));
|
yading@10
|
340 memcpy(q_temp[i + 2*ch_data->t_env[0]], sbr->q_m[0], m_max * sizeof(sbr->q_m[0][0]));
|
yading@10
|
341 }
|
yading@10
|
342 } else if (h_SL) {
|
yading@10
|
343 memcpy(g_temp[2*ch_data->t_env[0]], g_temp[2*ch_data->t_env_num_env_old], 4*sizeof(g_temp[0]));
|
yading@10
|
344 memcpy(q_temp[2*ch_data->t_env[0]], q_temp[2*ch_data->t_env_num_env_old], 4*sizeof(q_temp[0]));
|
yading@10
|
345 }
|
yading@10
|
346
|
yading@10
|
347 for (e = 0; e < ch_data->bs_num_env; e++) {
|
yading@10
|
348 for (i = 2 * ch_data->t_env[e]; i < 2 * ch_data->t_env[e + 1]; i++) {
|
yading@10
|
349 g_temp1 = g_temp[h_SL + i];
|
yading@10
|
350 pok = sbr->gain[e];
|
yading@10
|
351 q_temp1 = q_temp[h_SL + i];
|
yading@10
|
352 pok1 = sbr->q_m[e];
|
yading@10
|
353
|
yading@10
|
354 /* loop unrolled 4 times */
|
yading@10
|
355 for (j=0; j<(size>>2); j++) {
|
yading@10
|
356 __asm__ volatile (
|
yading@10
|
357 "lw %[temp1], 0(%[pok]) \n\t"
|
yading@10
|
358 "lw %[temp2], 4(%[pok]) \n\t"
|
yading@10
|
359 "lw %[temp3], 8(%[pok]) \n\t"
|
yading@10
|
360 "lw %[temp4], 12(%[pok]) \n\t"
|
yading@10
|
361 "sw %[temp1], 0(%[g_temp1]) \n\t"
|
yading@10
|
362 "sw %[temp2], 4(%[g_temp1]) \n\t"
|
yading@10
|
363 "sw %[temp3], 8(%[g_temp1]) \n\t"
|
yading@10
|
364 "sw %[temp4], 12(%[g_temp1]) \n\t"
|
yading@10
|
365 "lw %[temp1], 0(%[pok1]) \n\t"
|
yading@10
|
366 "lw %[temp2], 4(%[pok1]) \n\t"
|
yading@10
|
367 "lw %[temp3], 8(%[pok1]) \n\t"
|
yading@10
|
368 "lw %[temp4], 12(%[pok1]) \n\t"
|
yading@10
|
369 "sw %[temp1], 0(%[q_temp1]) \n\t"
|
yading@10
|
370 "sw %[temp2], 4(%[q_temp1]) \n\t"
|
yading@10
|
371 "sw %[temp3], 8(%[q_temp1]) \n\t"
|
yading@10
|
372 "sw %[temp4], 12(%[q_temp1]) \n\t"
|
yading@10
|
373 "addiu %[pok], %[pok], 16 \n\t"
|
yading@10
|
374 "addiu %[g_temp1], %[g_temp1], 16 \n\t"
|
yading@10
|
375 "addiu %[pok1], %[pok1], 16 \n\t"
|
yading@10
|
376 "addiu %[q_temp1], %[q_temp1], 16 \n\t"
|
yading@10
|
377
|
yading@10
|
378 : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
|
yading@10
|
379 [temp3]"=&r"(temp3), [temp4]"=&r"(temp4),
|
yading@10
|
380 [pok]"+r"(pok), [g_temp1]"+r"(g_temp1),
|
yading@10
|
381 [pok1]"+r"(pok1), [q_temp1]"+r"(q_temp1)
|
yading@10
|
382 :
|
yading@10
|
383 : "memory"
|
yading@10
|
384 );
|
yading@10
|
385 }
|
yading@10
|
386
|
yading@10
|
387 for (j=0; j<(size&3); j++) {
|
yading@10
|
388 __asm__ volatile (
|
yading@10
|
389 "lw %[temp1], 0(%[pok]) \n\t"
|
yading@10
|
390 "lw %[temp2], 0(%[pok1]) \n\t"
|
yading@10
|
391 "sw %[temp1], 0(%[g_temp1]) \n\t"
|
yading@10
|
392 "sw %[temp2], 0(%[q_temp1]) \n\t"
|
yading@10
|
393 "addiu %[pok], %[pok], 4 \n\t"
|
yading@10
|
394 "addiu %[g_temp1], %[g_temp1], 4 \n\t"
|
yading@10
|
395 "addiu %[pok1], %[pok1], 4 \n\t"
|
yading@10
|
396 "addiu %[q_temp1], %[q_temp1], 4 \n\t"
|
yading@10
|
397
|
yading@10
|
398 : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
|
yading@10
|
399 [temp3]"=&r"(temp3), [temp4]"=&r"(temp4),
|
yading@10
|
400 [pok]"+r"(pok), [g_temp1]"+r"(g_temp1),
|
yading@10
|
401 [pok1]"+r"(pok1), [q_temp1]"+r"(q_temp1)
|
yading@10
|
402 :
|
yading@10
|
403 : "memory"
|
yading@10
|
404 );
|
yading@10
|
405 }
|
yading@10
|
406 }
|
yading@10
|
407 }
|
yading@10
|
408
|
yading@10
|
409 for (e = 0; e < ch_data->bs_num_env; e++) {
|
yading@10
|
410 for (i = 2 * ch_data->t_env[e]; i < 2 * ch_data->t_env[e + 1]; i++) {
|
yading@10
|
411 LOCAL_ALIGNED_16(float, g_filt_tab, [48]);
|
yading@10
|
412 LOCAL_ALIGNED_16(float, q_filt_tab, [48]);
|
yading@10
|
413 float *g_filt, *q_filt;
|
yading@10
|
414
|
yading@10
|
415 if (h_SL && e != e_a[0] && e != e_a[1]) {
|
yading@10
|
416 g_filt = g_filt_tab;
|
yading@10
|
417 q_filt = q_filt_tab;
|
yading@10
|
418
|
yading@10
|
419 for (m = 0; m < m_max; m++) {
|
yading@10
|
420 const int idx1 = i + h_SL;
|
yading@10
|
421 g_filt[m] = 0.0f;
|
yading@10
|
422 q_filt[m] = 0.0f;
|
yading@10
|
423
|
yading@10
|
424 for (j = 0; j <= h_SL; j++) {
|
yading@10
|
425 g_filt[m] += g_temp[idx1 - j][m] * h_smooth[j];
|
yading@10
|
426 q_filt[m] += q_temp[idx1 - j][m] * h_smooth[j];
|
yading@10
|
427 }
|
yading@10
|
428 }
|
yading@10
|
429 } else {
|
yading@10
|
430 g_filt = g_temp[i + h_SL];
|
yading@10
|
431 q_filt = q_temp[i];
|
yading@10
|
432 }
|
yading@10
|
433
|
yading@10
|
434 sbr->dsp.hf_g_filt(Y1[i] + kx, X_high + kx, g_filt, m_max,
|
yading@10
|
435 i + ENVELOPE_ADJUSTMENT_OFFSET);
|
yading@10
|
436
|
yading@10
|
437 if (e != e_a[0] && e != e_a[1]) {
|
yading@10
|
438 sbr->dsp.hf_apply_noise[indexsine](Y1[i] + kx, sbr->s_m[e],
|
yading@10
|
439 q_filt, indexnoise,
|
yading@10
|
440 kx, m_max);
|
yading@10
|
441 } else {
|
yading@10
|
442 int idx = indexsine&1;
|
yading@10
|
443 int A = (1-((indexsine+(kx & 1))&2));
|
yading@10
|
444 int B = (A^(-idx)) + idx;
|
yading@10
|
445 float *out = &Y1[i][kx][idx];
|
yading@10
|
446 float *in = sbr->s_m[e];
|
yading@10
|
447 float temp0, temp1, temp2, temp3, temp4, temp5;
|
yading@10
|
448 float A_f = (float)A;
|
yading@10
|
449 float B_f = (float)B;
|
yading@10
|
450
|
yading@10
|
451 for (m = 0; m+1 < m_max; m+=2) {
|
yading@10
|
452
|
yading@10
|
453 temp2 = out[0];
|
yading@10
|
454 temp3 = out[2];
|
yading@10
|
455
|
yading@10
|
456 __asm__ volatile(
|
yading@10
|
457 "lwc1 %[temp0], 0(%[in]) \n\t"
|
yading@10
|
458 "lwc1 %[temp1], 4(%[in]) \n\t"
|
yading@10
|
459 "madd.s %[temp4], %[temp2], %[temp0], %[A_f] \n\t"
|
yading@10
|
460 "madd.s %[temp5], %[temp3], %[temp1], %[B_f] \n\t"
|
yading@10
|
461 "swc1 %[temp4], 0(%[out]) \n\t"
|
yading@10
|
462 "swc1 %[temp5], 8(%[out]) \n\t"
|
yading@10
|
463 "addiu %[in], %[in], 8 \n\t"
|
yading@10
|
464 "addiu %[out], %[out], 16 \n\t"
|
yading@10
|
465
|
yading@10
|
466 : [temp0]"=&f" (temp0), [temp1]"=&f"(temp1),
|
yading@10
|
467 [temp4]"=&f" (temp4), [temp5]"=&f"(temp5),
|
yading@10
|
468 [in]"+r"(in), [out]"+r"(out)
|
yading@10
|
469 : [A_f]"f"(A_f), [B_f]"f"(B_f), [temp2]"f"(temp2),
|
yading@10
|
470 [temp3]"f"(temp3)
|
yading@10
|
471 : "memory"
|
yading@10
|
472 );
|
yading@10
|
473 }
|
yading@10
|
474 if(m_max&1)
|
yading@10
|
475 out[2*m ] += in[m ] * A;
|
yading@10
|
476 }
|
yading@10
|
477 indexnoise = (indexnoise + m_max) & 0x1ff;
|
yading@10
|
478 indexsine = (indexsine + 1) & 3;
|
yading@10
|
479 }
|
yading@10
|
480 }
|
yading@10
|
481 ch_data->f_indexnoise = indexnoise;
|
yading@10
|
482 ch_data->f_indexsine = indexsine;
|
yading@10
|
483 }
|
yading@10
|
484
|
yading@10
|
485 static void sbr_hf_inverse_filter_mips(SBRDSPContext *dsp,
|
yading@10
|
486 float (*alpha0)[2], float (*alpha1)[2],
|
yading@10
|
487 const float X_low[32][40][2], int k0)
|
yading@10
|
488 {
|
yading@10
|
489 int k;
|
yading@10
|
490 float temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, c;
|
yading@10
|
491 float *phi1, *alpha_1, *alpha_0, res1, res2, temp_real, temp_im;
|
yading@10
|
492
|
yading@10
|
493 c = 1.000001f;
|
yading@10
|
494
|
yading@10
|
495 for (k = 0; k < k0; k++) {
|
yading@10
|
496 LOCAL_ALIGNED_16(float, phi, [3], [2][2]);
|
yading@10
|
497 float dk;
|
yading@10
|
498 phi1 = &phi[0][0][0];
|
yading@10
|
499 alpha_1 = &alpha1[k][0];
|
yading@10
|
500 alpha_0 = &alpha0[k][0];
|
yading@10
|
501 dsp->autocorrelate(X_low[k], phi);
|
yading@10
|
502
|
yading@10
|
503 __asm__ volatile (
|
yading@10
|
504 "lwc1 %[temp0], 40(%[phi1]) \n\t"
|
yading@10
|
505 "lwc1 %[temp1], 16(%[phi1]) \n\t"
|
yading@10
|
506 "lwc1 %[temp2], 24(%[phi1]) \n\t"
|
yading@10
|
507 "lwc1 %[temp3], 28(%[phi1]) \n\t"
|
yading@10
|
508 "mul.s %[dk], %[temp0], %[temp1] \n\t"
|
yading@10
|
509 "lwc1 %[temp4], 0(%[phi1]) \n\t"
|
yading@10
|
510 "mul.s %[res2], %[temp2], %[temp2] \n\t"
|
yading@10
|
511 "lwc1 %[temp5], 4(%[phi1]) \n\t"
|
yading@10
|
512 "madd.s %[res2], %[res2], %[temp3], %[temp3] \n\t"
|
yading@10
|
513 "lwc1 %[temp6], 8(%[phi1]) \n\t"
|
yading@10
|
514 "div.s %[res2], %[res2], %[c] \n\t"
|
yading@10
|
515 "lwc1 %[temp0], 12(%[phi1]) \n\t"
|
yading@10
|
516 "sub.s %[dk], %[dk], %[res2] \n\t"
|
yading@10
|
517
|
yading@10
|
518 : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
|
yading@10
|
519 [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
|
yading@10
|
520 [temp6]"=&f"(temp6), [res2]"=&f"(res2), [dk]"=&f"(dk)
|
yading@10
|
521 : [phi1]"r"(phi1), [c]"f"(c)
|
yading@10
|
522 : "memory"
|
yading@10
|
523 );
|
yading@10
|
524
|
yading@10
|
525 if (!dk) {
|
yading@10
|
526 alpha_1[0] = 0;
|
yading@10
|
527 alpha_1[1] = 0;
|
yading@10
|
528 } else {
|
yading@10
|
529 __asm__ volatile (
|
yading@10
|
530 "mul.s %[temp_real], %[temp4], %[temp2] \n\t"
|
yading@10
|
531 "nmsub.s %[temp_real], %[temp_real], %[temp5], %[temp3] \n\t"
|
yading@10
|
532 "nmsub.s %[temp_real], %[temp_real], %[temp6], %[temp1] \n\t"
|
yading@10
|
533 "mul.s %[temp_im], %[temp4], %[temp3] \n\t"
|
yading@10
|
534 "madd.s %[temp_im], %[temp_im], %[temp5], %[temp2] \n\t"
|
yading@10
|
535 "nmsub.s %[temp_im], %[temp_im], %[temp0], %[temp1] \n\t"
|
yading@10
|
536 "div.s %[temp_real], %[temp_real], %[dk] \n\t"
|
yading@10
|
537 "div.s %[temp_im], %[temp_im], %[dk] \n\t"
|
yading@10
|
538 "swc1 %[temp_real], 0(%[alpha_1]) \n\t"
|
yading@10
|
539 "swc1 %[temp_im], 4(%[alpha_1]) \n\t"
|
yading@10
|
540
|
yading@10
|
541 : [temp_real]"=&f" (temp_real), [temp_im]"=&f"(temp_im)
|
yading@10
|
542 : [phi1]"r"(phi1), [temp0]"f"(temp0), [temp1]"f"(temp1),
|
yading@10
|
543 [temp2]"f"(temp2), [temp3]"f"(temp3), [temp4]"f"(temp4),
|
yading@10
|
544 [temp5]"f"(temp5), [temp6]"f"(temp6),
|
yading@10
|
545 [alpha_1]"r"(alpha_1), [dk]"f"(dk)
|
yading@10
|
546 : "memory"
|
yading@10
|
547 );
|
yading@10
|
548 }
|
yading@10
|
549
|
yading@10
|
550 if (!phi1[4]) {
|
yading@10
|
551 alpha_0[0] = 0;
|
yading@10
|
552 alpha_0[1] = 0;
|
yading@10
|
553 } else {
|
yading@10
|
554 __asm__ volatile (
|
yading@10
|
555 "lwc1 %[temp6], 0(%[alpha_1]) \n\t"
|
yading@10
|
556 "lwc1 %[temp7], 4(%[alpha_1]) \n\t"
|
yading@10
|
557 "mul.s %[temp_real], %[temp6], %[temp2] \n\t"
|
yading@10
|
558 "add.s %[temp_real], %[temp_real], %[temp4] \n\t"
|
yading@10
|
559 "madd.s %[temp_real], %[temp_real], %[temp7], %[temp3] \n\t"
|
yading@10
|
560 "mul.s %[temp_im], %[temp7], %[temp2] \n\t"
|
yading@10
|
561 "add.s %[temp_im], %[temp_im], %[temp5] \n\t"
|
yading@10
|
562 "nmsub.s %[temp_im], %[temp_im], %[temp6], %[temp3] \n\t"
|
yading@10
|
563 "div.s %[temp_real], %[temp_real], %[temp1] \n\t"
|
yading@10
|
564 "div.s %[temp_im], %[temp_im], %[temp1] \n\t"
|
yading@10
|
565 "neg.s %[temp_real], %[temp_real] \n\t"
|
yading@10
|
566 "neg.s %[temp_im], %[temp_im] \n\t"
|
yading@10
|
567 "swc1 %[temp_real], 0(%[alpha_0]) \n\t"
|
yading@10
|
568 "swc1 %[temp_im], 4(%[alpha_0]) \n\t"
|
yading@10
|
569
|
yading@10
|
570 : [temp_real]"=&f"(temp_real), [temp_im]"=&f"(temp_im),
|
yading@10
|
571 [temp6]"=&f"(temp6), [temp7]"=&f"(temp7),
|
yading@10
|
572 [res1]"=&f"(res1), [res2]"=&f"(res2)
|
yading@10
|
573 : [alpha_1]"r"(alpha_1), [alpha_0]"r"(alpha_0),
|
yading@10
|
574 [temp0]"f"(temp0), [temp1]"f"(temp1), [temp2]"f"(temp2),
|
yading@10
|
575 [temp3]"f"(temp3), [temp4]"f"(temp4), [temp5]"f"(temp5)
|
yading@10
|
576 : "memory"
|
yading@10
|
577 );
|
yading@10
|
578 }
|
yading@10
|
579
|
yading@10
|
580 __asm__ volatile (
|
yading@10
|
581 "lwc1 %[temp1], 0(%[alpha_1]) \n\t"
|
yading@10
|
582 "lwc1 %[temp2], 4(%[alpha_1]) \n\t"
|
yading@10
|
583 "lwc1 %[temp_real], 0(%[alpha_0]) \n\t"
|
yading@10
|
584 "lwc1 %[temp_im], 4(%[alpha_0]) \n\t"
|
yading@10
|
585 "mul.s %[res1], %[temp1], %[temp1] \n\t"
|
yading@10
|
586 "madd.s %[res1], %[res1], %[temp2], %[temp2] \n\t"
|
yading@10
|
587 "mul.s %[res2], %[temp_real], %[temp_real] \n\t"
|
yading@10
|
588 "madd.s %[res2], %[res2], %[temp_im], %[temp_im] \n\t"
|
yading@10
|
589
|
yading@10
|
590 : [temp_real]"=&f"(temp_real), [temp_im]"=&f"(temp_im),
|
yading@10
|
591 [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
|
yading@10
|
592 [res1]"=&f"(res1), [res2]"=&f"(res2)
|
yading@10
|
593 : [alpha_1]"r"(alpha_1), [alpha_0]"r"(alpha_0)
|
yading@10
|
594 : "memory"
|
yading@10
|
595 );
|
yading@10
|
596
|
yading@10
|
597 if (res1 >= 16.0f || res2 >= 16.0f) {
|
yading@10
|
598 alpha_1[0] = 0;
|
yading@10
|
599 alpha_1[1] = 0;
|
yading@10
|
600 alpha_0[0] = 0;
|
yading@10
|
601 alpha_0[1] = 0;
|
yading@10
|
602 }
|
yading@10
|
603 }
|
yading@10
|
604 }
|
yading@10
|
605 #endif /* HAVE_MIPSFPU */
|
yading@10
|
606 #endif /* HAVE_INLINE_ASM */
|
yading@10
|
607
|
yading@10
|
608 void ff_aacsbr_func_ptr_init_mips(AACSBRContext *c)
|
yading@10
|
609 {
|
yading@10
|
610 #if HAVE_INLINE_ASM
|
yading@10
|
611 c->sbr_lf_gen = sbr_lf_gen_mips;
|
yading@10
|
612 c->sbr_x_gen = sbr_x_gen_mips;
|
yading@10
|
613 #if HAVE_MIPSFPU
|
yading@10
|
614 c->sbr_hf_inverse_filter = sbr_hf_inverse_filter_mips;
|
yading@10
|
615 c->sbr_hf_assemble = sbr_hf_assemble_mips;
|
yading@10
|
616 #endif /* HAVE_MIPSFPU */
|
yading@10
|
617 #endif /* HAVE_INLINE_ASM */
|
yading@10
|
618 }
|