aacpsdsp_mips.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2012
3  * MIPS Technologies, Inc., California.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
14  * contributors may be used to endorse or promote products derived from
15  * this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * Authors: Darko Laus (darko@mips.com)
30  * Djordje Pesut (djordje@mips.com)
31  * Mirjana Vulin (mvulin@mips.com)
32  *
33  * This file is part of FFmpeg.
34  *
35  * FFmpeg is free software; you can redistribute it and/or
36  * modify it under the terms of the GNU Lesser General Public
37  * License as published by the Free Software Foundation; either
38  * version 2.1 of the License, or (at your option) any later version.
39  *
40  * FFmpeg is distributed in the hope that it will be useful,
41  * but WITHOUT ANY WARRANTY; without even the implied warranty of
42  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
43  * Lesser General Public License for more details.
44  *
45  * You should have received a copy of the GNU Lesser General Public
46  * License along with FFmpeg; if not, write to the Free Software
47  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
48  */
49 
50 /**
51  * @file
52  * Reference: libavcodec/aacpsdsp.c
53  */
54 
55 #include "config.h"
56 #include "libavcodec/aacpsdsp.h"
57 
58 #if HAVE_INLINE_ASM
59 static void ps_hybrid_analysis_ileave_mips(float (*out)[32][2], float L[2][38][64],
60  int i, int len)
61 {
62  int temp0, temp1, temp2, temp3;
63  int temp4, temp5, temp6, temp7;
64  float *out1=&out[i][0][0];
65  float *L1=&L[0][0][i];
66  float *j=out1+ len*2;
67 
68  for (; i < 64; i++) {
69 
70  /* loop unrolled 8 times */
71  __asm__ volatile (
72  "1: \n\t"
73  "lw %[temp0], 0(%[L1]) \n\t"
74  "lw %[temp1], 9728(%[L1]) \n\t"
75  "lw %[temp2], 256(%[L1]) \n\t"
76  "lw %[temp3], 9984(%[L1]) \n\t"
77  "lw %[temp4], 512(%[L1]) \n\t"
78  "lw %[temp5], 10240(%[L1]) \n\t"
79  "lw %[temp6], 768(%[L1]) \n\t"
80  "lw %[temp7], 10496(%[L1]) \n\t"
81  "sw %[temp0], 0(%[out1]) \n\t"
82  "sw %[temp1], 4(%[out1]) \n\t"
83  "sw %[temp2], 8(%[out1]) \n\t"
84  "sw %[temp3], 12(%[out1]) \n\t"
85  "sw %[temp4], 16(%[out1]) \n\t"
86  "sw %[temp5], 20(%[out1]) \n\t"
87  "sw %[temp6], 24(%[out1]) \n\t"
88  "sw %[temp7], 28(%[out1]) \n\t"
89  "addiu %[out1], %[out1], 32 \n\t"
90  "addiu %[L1], %[L1], 1024 \n\t"
91  "bne %[out1], %[j], 1b \n\t"
92 
93  : [out1]"+r"(out1), [L1]"+r"(L1), [j]"+r"(j),
94  [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
95  [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
96  [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
97  [temp6]"=&r"(temp6), [temp7]"=&r"(temp7)
98  : [len]"r"(len)
99  : "memory"
100  );
101  out1-=(len<<1)-64;
102  L1-=(len<<6)-1;
103  j+=len*2;
104  }
105 }
106 
107 static void ps_hybrid_synthesis_deint_mips(float out[2][38][64],
108  float (*in)[32][2],
109  int i, int len)
110 {
111  int n;
112  int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
113  float *out1 = (float*)out + i;
114  float *out2 = (float*)out + 2432 + i;
115  float *in1 = (float*)in + 64 * i;
116  float *in2 = (float*)in + 64 * i + 1;
117 
118  for (; i < 64; i++) {
119  for (n = 0; n < 7; n++) {
120 
121  /* loop unrolled 8 times */
122  __asm__ volatile (
123  "lw %[temp0], 0(%[in1]) \n\t"
124  "lw %[temp1], 0(%[in2]) \n\t"
125  "lw %[temp2], 8(%[in1]) \n\t"
126  "lw %[temp3], 8(%[in2]) \n\t"
127  "lw %[temp4], 16(%[in1]) \n\t"
128  "lw %[temp5], 16(%[in2]) \n\t"
129  "lw %[temp6], 24(%[in1]) \n\t"
130  "lw %[temp7], 24(%[in2]) \n\t"
131  "addiu %[out1], %[out1], 1024 \n\t"
132  "addiu %[out2], %[out2], 1024 \n\t"
133  "addiu %[in1], %[in1], 32 \n\t"
134  "addiu %[in2], %[in2], 32 \n\t"
135  "sw %[temp0], -1024(%[out1]) \n\t"
136  "sw %[temp1], -1024(%[out2]) \n\t"
137  "sw %[temp2], -768(%[out1]) \n\t"
138  "sw %[temp3], -768(%[out2]) \n\t"
139  "sw %[temp4], -512(%[out1]) \n\t"
140  "sw %[temp5], -512(%[out2]) \n\t"
141  "sw %[temp6], -256(%[out1]) \n\t"
142  "sw %[temp7], -256(%[out2]) \n\t"
143 
144  : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
145  [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
146  [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
147  [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
148  [out1]"+r"(out1), [out2]"+r"(out2),
149  [in1]"+r"(in1), [in2]"+r"(in2)
150  :
151  : "memory"
152  );
153  }
154  /* loop unrolled 8 times */
155  __asm__ volatile (
156  "lw %[temp0], 0(%[in1]) \n\t"
157  "lw %[temp1], 0(%[in2]) \n\t"
158  "lw %[temp2], 8(%[in1]) \n\t"
159  "lw %[temp3], 8(%[in2]) \n\t"
160  "lw %[temp4], 16(%[in1]) \n\t"
161  "lw %[temp5], 16(%[in2]) \n\t"
162  "lw %[temp6], 24(%[in1]) \n\t"
163  "lw %[temp7], 24(%[in2]) \n\t"
164  "addiu %[out1], %[out1], -7164 \n\t"
165  "addiu %[out2], %[out2], -7164 \n\t"
166  "addiu %[in1], %[in1], 32 \n\t"
167  "addiu %[in2], %[in2], 32 \n\t"
168  "sw %[temp0], 7164(%[out1]) \n\t"
169  "sw %[temp1], 7164(%[out2]) \n\t"
170  "sw %[temp2], 7420(%[out1]) \n\t"
171  "sw %[temp3], 7420(%[out2]) \n\t"
172  "sw %[temp4], 7676(%[out1]) \n\t"
173  "sw %[temp5], 7676(%[out2]) \n\t"
174  "sw %[temp6], 7932(%[out1]) \n\t"
175  "sw %[temp7], 7932(%[out2]) \n\t"
176 
177  : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
178  [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
179  [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
180  [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
181  [out1]"+r"(out1), [out2]"+r"(out2),
182  [in1]"+r"(in1), [in2]"+r"(in2)
183  :
184  : "memory"
185  );
186  }
187 }
188 
189 #if HAVE_MIPSFPU
190 static void ps_add_squares_mips(float *dst, const float (*src)[2], int n)
191 {
192  int i;
193  float temp0, temp1, temp2, temp3, temp4, temp5;
194  float temp6, temp7, temp8, temp9, temp10, temp11;
195  float *src0 = (float*)&src[0][0];
196  float *dst0 = &dst[0];
197 
198  for (i = 0; i < 8; i++) {
199  /* loop unrolled 4 times */
200  __asm__ volatile (
201  "lwc1 %[temp0], 0(%[src0]) \n\t"
202  "lwc1 %[temp1], 4(%[src0]) \n\t"
203  "lwc1 %[temp2], 8(%[src0]) \n\t"
204  "lwc1 %[temp3], 12(%[src0]) \n\t"
205  "lwc1 %[temp4], 16(%[src0]) \n\t"
206  "lwc1 %[temp5], 20(%[src0]) \n\t"
207  "lwc1 %[temp6], 24(%[src0]) \n\t"
208  "lwc1 %[temp7], 28(%[src0]) \n\t"
209  "lwc1 %[temp8], 0(%[dst0]) \n\t"
210  "lwc1 %[temp9], 4(%[dst0]) \n\t"
211  "lwc1 %[temp10], 8(%[dst0]) \n\t"
212  "lwc1 %[temp11], 12(%[dst0]) \n\t"
213  "mul.s %[temp1], %[temp1], %[temp1] \n\t"
214  "mul.s %[temp3], %[temp3], %[temp3] \n\t"
215  "mul.s %[temp5], %[temp5], %[temp5] \n\t"
216  "mul.s %[temp7], %[temp7], %[temp7] \n\t"
217  "madd.s %[temp0], %[temp1], %[temp0], %[temp0] \n\t"
218  "madd.s %[temp2], %[temp3], %[temp2], %[temp2] \n\t"
219  "madd.s %[temp4], %[temp5], %[temp4], %[temp4] \n\t"
220  "madd.s %[temp6], %[temp7], %[temp6], %[temp6] \n\t"
221  "add.s %[temp0], %[temp8], %[temp0] \n\t"
222  "add.s %[temp2], %[temp9], %[temp2] \n\t"
223  "add.s %[temp4], %[temp10], %[temp4] \n\t"
224  "add.s %[temp6], %[temp11], %[temp6] \n\t"
225  "swc1 %[temp0], 0(%[dst0]) \n\t"
226  "swc1 %[temp2], 4(%[dst0]) \n\t"
227  "swc1 %[temp4], 8(%[dst0]) \n\t"
228  "swc1 %[temp6], 12(%[dst0]) \n\t"
229  "addiu %[dst0], %[dst0], 16 \n\t"
230  "addiu %[src0], %[src0], 32 \n\t"
231 
232  : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
233  [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
234  [temp6]"=&f"(temp6), [temp7]"=&f"(temp7), [temp8]"=&f"(temp8),
235  [temp9]"=&f"(temp9), [dst0]"+r"(dst0), [src0]"+r"(src0),
236  [temp10]"=&f"(temp10), [temp11]"=&f"(temp11)
237  :
238  : "memory"
239  );
240  }
241 }
242 
243 static void ps_mul_pair_single_mips(float (*dst)[2], float (*src0)[2], float *src1,
244  int n)
245 {
246  float temp0, temp1, temp2;
247  float *p_d, *p_s0, *p_s1, *end;
248  p_d = &dst[0][0];
249  p_s0 = &src0[0][0];
250  p_s1 = &src1[0];
251  end = p_s1 + n;
252 
253  __asm__ volatile(
254  ".set push \n\t"
255  ".set noreorder \n\t"
256  "1: \n\t"
257  "lwc1 %[temp2], 0(%[p_s1]) \n\t"
258  "lwc1 %[temp0], 0(%[p_s0]) \n\t"
259  "lwc1 %[temp1], 4(%[p_s0]) \n\t"
260  "addiu %[p_d], %[p_d], 8 \n\t"
261  "mul.s %[temp0], %[temp0], %[temp2] \n\t"
262  "mul.s %[temp1], %[temp1], %[temp2] \n\t"
263  "addiu %[p_s0], %[p_s0], 8 \n\t"
264  "swc1 %[temp0], -8(%[p_d]) \n\t"
265  "swc1 %[temp1], -4(%[p_d]) \n\t"
266  "bne %[p_s1], %[end], 1b \n\t"
267  " addiu %[p_s1], %[p_s1], 4 \n\t"
268  ".set pop \n\t"
269 
270  : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1),
271  [temp2]"=&f"(temp2), [p_d]"+r"(p_d),
272  [p_s0]"+r"(p_s0), [p_s1]"+r"(p_s1)
273  : [end]"r"(end)
274  : "memory"
275  );
276 }
277 
278 static void ps_decorrelate_mips(float (*out)[2], float (*delay)[2],
279  float (*ap_delay)[PS_QMF_TIME_SLOTS + PS_MAX_AP_DELAY][2],
280  const float phi_fract[2], float (*Q_fract)[2],
281  const float *transient_gain,
282  float g_decay_slope,
283  int len)
284 {
285  float *p_delay = &delay[0][0];
286  float *p_out = &out[0][0];
287  float *p_ap_delay = &ap_delay[0][0][0];
288  float *p_t_gain = (float*)transient_gain;
289  float *p_Q_fract = &Q_fract[0][0];
290  float ag0, ag1, ag2;
291  float phi_fract0 = phi_fract[0];
292  float phi_fract1 = phi_fract[1];
293  float temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9;
294 
295  len = (int)((int*)p_delay + (len << 1));
296 
297  /* merged 2 loops */
298  __asm__ volatile(
299  ".set push \n\t"
300  ".set noreorder \n\t"
301  "li.s %[ag0], 0.65143905753106 \n\t"
302  "li.s %[ag1], 0.56471812200776 \n\t"
303  "li.s %[ag2], 0.48954165955695 \n\t"
304  "mul.s %[ag0], %[ag0], %[g_decay_slope] \n\t"
305  "mul.s %[ag1], %[ag1], %[g_decay_slope] \n\t"
306  "mul.s %[ag2], %[ag2], %[g_decay_slope] \n\t"
307  "1: \n\t"
308  "lwc1 %[temp0], 0(%[p_delay]) \n\t"
309  "lwc1 %[temp1], 4(%[p_delay]) \n\t"
310  "lwc1 %[temp4], 16(%[p_ap_delay]) \n\t"
311  "lwc1 %[temp5], 20(%[p_ap_delay]) \n\t"
312  "mul.s %[temp3], %[temp0], %[phi_fract1] \n\t"
313  "lwc1 %[temp6], 0(%[p_Q_fract]) \n\t"
314  "mul.s %[temp2], %[temp1], %[phi_fract1] \n\t"
315  "lwc1 %[temp7], 4(%[p_Q_fract]) \n\t"
316  "madd.s %[temp3], %[temp3], %[temp1], %[phi_fract0] \n\t"
317  "msub.s %[temp2], %[temp2], %[temp0], %[phi_fract0] \n\t"
318  "mul.s %[temp8], %[temp5], %[temp7] \n\t"
319  "mul.s %[temp9], %[temp4], %[temp7] \n\t"
320  "lwc1 %[temp7], 12(%[p_Q_fract]) \n\t"
321  "mul.s %[temp0], %[ag0], %[temp2] \n\t"
322  "mul.s %[temp1], %[ag0], %[temp3] \n\t"
323  "msub.s %[temp8], %[temp8], %[temp4], %[temp6] \n\t"
324  "lwc1 %[temp4], 304(%[p_ap_delay]) \n\t"
325  "madd.s %[temp9], %[temp9], %[temp5], %[temp6] \n\t"
326  "lwc1 %[temp5], 308(%[p_ap_delay]) \n\t"
327  "sub.s %[temp0], %[temp8], %[temp0] \n\t"
328  "sub.s %[temp1], %[temp9], %[temp1] \n\t"
329  "madd.s %[temp2], %[temp2], %[ag0], %[temp0] \n\t"
330  "lwc1 %[temp6], 8(%[p_Q_fract]) \n\t"
331  "madd.s %[temp3], %[temp3], %[ag0], %[temp1] \n\t"
332  "mul.s %[temp8], %[temp5], %[temp7] \n\t"
333  "mul.s %[temp9], %[temp4], %[temp7] \n\t"
334  "lwc1 %[temp7], 20(%[p_Q_fract]) \n\t"
335  "msub.s %[temp8], %[temp8], %[temp4], %[temp6] \n\t"
336  "swc1 %[temp2], 40(%[p_ap_delay]) \n\t"
337  "mul.s %[temp2], %[ag1], %[temp0] \n\t"
338  "swc1 %[temp3], 44(%[p_ap_delay]) \n\t"
339  "mul.s %[temp3], %[ag1], %[temp1] \n\t"
340  "lwc1 %[temp4], 592(%[p_ap_delay]) \n\t"
341  "madd.s %[temp9], %[temp9], %[temp5], %[temp6] \n\t"
342  "lwc1 %[temp5], 596(%[p_ap_delay]) \n\t"
343  "sub.s %[temp2], %[temp8], %[temp2] \n\t"
344  "sub.s %[temp3], %[temp9], %[temp3] \n\t"
345  "lwc1 %[temp6], 16(%[p_Q_fract]) \n\t"
346  "madd.s %[temp0], %[temp0], %[ag1], %[temp2] \n\t"
347  "madd.s %[temp1], %[temp1], %[ag1], %[temp3] \n\t"
348  "mul.s %[temp8], %[temp5], %[temp7] \n\t"
349  "mul.s %[temp9], %[temp4], %[temp7] \n\t"
350  "msub.s %[temp8], %[temp8], %[temp4], %[temp6] \n\t"
351  "madd.s %[temp9], %[temp9], %[temp5], %[temp6] \n\t"
352  "swc1 %[temp0], 336(%[p_ap_delay]) \n\t"
353  "mul.s %[temp0], %[ag2], %[temp2] \n\t"
354  "swc1 %[temp1], 340(%[p_ap_delay]) \n\t"
355  "mul.s %[temp1], %[ag2], %[temp3] \n\t"
356  "lwc1 %[temp4], 0(%[p_t_gain]) \n\t"
357  "sub.s %[temp0], %[temp8], %[temp0] \n\t"
358  "addiu %[p_ap_delay], %[p_ap_delay], 8 \n\t"
359  "sub.s %[temp1], %[temp9], %[temp1] \n\t"
360  "addiu %[p_t_gain], %[p_t_gain], 4 \n\t"
361  "madd.s %[temp2], %[temp2], %[ag2], %[temp0] \n\t"
362  "addiu %[p_delay], %[p_delay], 8 \n\t"
363  "madd.s %[temp3], %[temp3], %[ag2], %[temp1] \n\t"
364  "addiu %[p_out], %[p_out], 8 \n\t"
365  "mul.s %[temp5], %[temp4], %[temp0] \n\t"
366  "mul.s %[temp6], %[temp4], %[temp1] \n\t"
367  "swc1 %[temp2], 624(%[p_ap_delay]) \n\t"
368  "swc1 %[temp3], 628(%[p_ap_delay]) \n\t"
369  "swc1 %[temp5], -8(%[p_out]) \n\t"
370  "swc1 %[temp6], -4(%[p_out]) \n\t"
371  "bne %[p_delay], %[len], 1b \n\t"
372  " swc1 %[temp6], -4(%[p_out]) \n\t"
373  ".set pop \n\t"
374 
375  : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
376  [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
377  [temp6]"=&f"(temp6), [temp7]"=&f"(temp7), [temp8]"=&f"(temp8),
378  [temp9]"=&f"(temp9), [p_delay]"+r"(p_delay), [p_ap_delay]"+r"(p_ap_delay),
379  [p_Q_fract]"+r"(p_Q_fract), [p_t_gain]"+r"(p_t_gain), [p_out]"+r"(p_out),
380  [ag0]"=&f"(ag0), [ag1]"=&f"(ag1), [ag2]"=&f"(ag2)
381  : [phi_fract0]"f"(phi_fract0), [phi_fract1]"f"(phi_fract1),
382  [len]"r"(len), [g_decay_slope]"f"(g_decay_slope)
383  : "memory"
384  );
385 }
386 
387 static void ps_stereo_interpolate_mips(float (*l)[2], float (*r)[2],
388  float h[2][4], float h_step[2][4],
389  int len)
390 {
391  float h0 = h[0][0];
392  float h1 = h[0][1];
393  float h2 = h[0][2];
394  float h3 = h[0][3];
395  float hs0 = h_step[0][0];
396  float hs1 = h_step[0][1];
397  float hs2 = h_step[0][2];
398  float hs3 = h_step[0][3];
399  float temp0, temp1, temp2, temp3;
400  float l_re, l_im, r_re, r_im;
401 
402  len = (int)((int*)l + (len << 1));
403 
404  __asm__ volatile(
405  ".set push \n\t"
406  ".set noreorder \n\t"
407  "1: \n\t"
408  "add.s %[h0], %[h0], %[hs0] \n\t"
409  "lwc1 %[l_re], 0(%[l]) \n\t"
410  "add.s %[h1], %[h1], %[hs1] \n\t"
411  "lwc1 %[r_re], 0(%[r]) \n\t"
412  "add.s %[h2], %[h2], %[hs2] \n\t"
413  "lwc1 %[l_im], 4(%[l]) \n\t"
414  "add.s %[h3], %[h3], %[hs3] \n\t"
415  "lwc1 %[r_im], 4(%[r]) \n\t"
416  "mul.s %[temp0], %[h0], %[l_re] \n\t"
417  "addiu %[l], %[l], 8 \n\t"
418  "mul.s %[temp2], %[h1], %[l_re] \n\t"
419  "addiu %[r], %[r], 8 \n\t"
420  "madd.s %[temp0], %[temp0], %[h2], %[r_re] \n\t"
421  "madd.s %[temp2], %[temp2], %[h3], %[r_re] \n\t"
422  "mul.s %[temp1], %[h0], %[l_im] \n\t"
423  "mul.s %[temp3], %[h1], %[l_im] \n\t"
424  "madd.s %[temp1], %[temp1], %[h2], %[r_im] \n\t"
425  "madd.s %[temp3], %[temp3], %[h3], %[r_im] \n\t"
426  "swc1 %[temp0], -8(%[l]) \n\t"
427  "swc1 %[temp2], -8(%[r]) \n\t"
428  "swc1 %[temp1], -4(%[l]) \n\t"
429  "bne %[l], %[len], 1b \n\t"
430  " swc1 %[temp3], -4(%[r]) \n\t"
431  ".set pop \n\t"
432 
433  : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1),
434  [temp2]"=&f"(temp2), [temp3]"=&f"(temp3),
435  [h0]"+f"(h0), [h1]"+f"(h1), [h2]"+f"(h2),
436  [h3]"+f"(h3), [l]"+r"(l), [r]"+r"(r),
437  [l_re]"=&f"(l_re), [l_im]"=&f"(l_im),
438  [r_re]"=&f"(r_re), [r_im]"=&f"(r_im)
439  : [hs0]"f"(hs0), [hs1]"f"(hs1), [hs2]"f"(hs2),
440  [hs3]"f"(hs3), [len]"r"(len)
441  : "memory"
442  );
443 }
444 #endif /* HAVE_MIPSFPU */
445 #endif /* HAVE_INLINE_ASM */
446 
448 {
449 #if HAVE_INLINE_ASM
450  s->hybrid_analysis_ileave = ps_hybrid_analysis_ileave_mips;
451  s->hybrid_synthesis_deint = ps_hybrid_synthesis_deint_mips;
452 #if HAVE_MIPSFPU
453  s->add_squares = ps_add_squares_mips;
454  s->mul_pair_single = ps_mul_pair_single_mips;
455  s->decorrelate = ps_decorrelate_mips;
456  s->stereo_interpolate[0] = ps_stereo_interpolate_mips;
457 #endif /* HAVE_MIPSFPU */
458 #endif /* HAVE_INLINE_ASM */
459 }
const char * s
Definition: avisynth_c.h:668
About Git write you should know how to use GIT properly Luckily Git comes with excellent documentation git help man git shows you the available git< command > help man git< command > shows information about the subcommand< command > The most comprehensive manual is the website Git Reference visit they are quite exhaustive You do not need a special username or password All you need is to provide a ssh public key to the Git server admin What follows now is a basic introduction to Git and some FFmpeg specific guidelines Read it at least if you are granted commit privileges to the FFmpeg project you are expected to be familiar with these rules I if not You can get git from etc no matter how small Every one of them has been saved from looking like a fool by this many times It s very easy for stray debug output or cosmetic modifications to slip in
Definition: git-howto.txt:5
F H1 F F H1 F F F F H1<-F-------F-------F v v v H2 H3 H2^^^F-------F-------F-> H1<-F-------F-------F|||||||||F H1 F|||||||||F H1 Funavailable fullpel samples(outside the picture for example) shall be equalto the closest available fullpel sampleSmaller pel interpolation:--------------------------if diag_mc is set then points which lie on a line between 2 vertically, horiziontally or diagonally adjacent halfpel points shall be interpolatedlinearls with rounding to nearest and halfway values rounded up.points which lie on 2 diagonals at the same time should only use the onediagonal not containing the fullpel point F--> O q O<--h1-> O q O<--F v\/v\/v O O O O O O O|/|\|q q q q q|/|\|O O O O O O O^/\^/\^h2--> O q O<--h3-> O q O<--h2 v\/v\/v O O O O O O O|\|/|q q q q q|\|/|O O O O O O O^/\^/\^F--> O q O<--h1-> O q O<--Fthe remaining points shall be bilinearly interpolated from theup to 4 surrounding halfpel and fullpel points, again rounding should be tonearest and halfway values rounded upcompliant Snow decoders MUST support 1-1/8 pel luma and 1/2-1/16 pel chromainterpolation at leastOverlapped block motion compensation:-------------------------------------FIXMELL band prediction:===================Each sample in the LL0 subband is predicted by the median of the left, top andleft+top-topleft samples, samples outside the subband shall be considered tobe 0.To reverse this prediction in the decoder apply the following.for(y=0;y< height;y++){for(x=0;x< width;x++){sample[y][x]+=median(sample[y-1][x], sample[y][x-1], sample[y-1][x]+sample[y][x-1]-sample[y-1][x-1]);}}sample[-1][*]=sample[*][-1]=0;width, height here are the width and height of the LL0 subband not of the finalvideoDequantizaton:==============FIXMEWavelet Transform:==================Snow supports 2 wavelet transforms, the symmetric biorthogonal 5/3 integertransform and a integer approximation of the symmetric biorthogonal 9/7daubechies wavelet.2D IDWT(inverse discrete wavelet transform)--------------------------------------------The 2D IDWT applies a 2D filter recursively, each time combining the4 lowest frequency subbands into a single subband until only 1 subbandremains.The 2D filter is done by first applying a 1D filter in the vertical directionand then applying it in the horizontal one.------------------------------------------------------------|LL0|HL0|||||||||||||---+---|HL1||L0|H0|HL1||LL1|HL1|||||LH0|HH0|||||||||||||-------+-------|-> L1 H1 LH1 HH1 LH1 HH1 LH1 HH1 L1
Definition: snow.txt:546
void(* stereo_interpolate[2])(float(*l)[2], float(*r)[2], float h[2][4], float h_step[2][4], int len)
Definition: aacpsdsp.h:45
#define PS_MAX_AP_DELAY
Definition: aacps.h:39
void(* mul_pair_single)(float(*dst)[2], float(*src0)[2], float *src1, int n)
Definition: aacpsdsp.h:30
end end
void(* hybrid_synthesis_deint)(float out[2][38][64], float(*in)[32][2], int i, int len)
Definition: aacpsdsp.h:37
const char * r
Definition: vf_curves.c:94
#define L(x)
void(* hybrid_analysis_ileave)(float(*out)[32][2], float L[2][38][64], int i, int len)
Definition: aacpsdsp.h:35
AVS_Value src
Definition: avisynth_c.h:523
void ff_psdsp_init_mips(PSDSPContext *s)
synthesis window for stochastic i
void(* decorrelate)(float(*out)[2], float(*delay)[2], float(*ap_delay)[PS_QMF_TIME_SLOTS+PS_MAX_AP_DELAY][2], const float phi_fract[2], float(*Q_fract)[2], const float *transient_gain, float g_decay_slope, int len)
Definition: aacpsdsp.h:39
void(* add_squares)(float *dst, const float(*src)[2], int n)
Definition: aacpsdsp.h:29
#define PS_QMF_TIME_SLOTS
Definition: aacps.h:36
int len
else dst[i][x+y *dst_stride[i]]
Definition: vf_mcdeint.c:160
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t,*(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t,*(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t,*(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t,*(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31))))#define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac){}void ff_audio_convert_free(AudioConvert **ac){if(!*ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);}AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map){AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method!=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2){ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc){av_free(ac);return NULL;}return ac;}in_planar=av_sample_fmt_is_planar(in_fmt);out_planar=av_sample_fmt_is_planar(out_fmt);if(in_planar==out_planar){ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar?ac->channels:1;}else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;}int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in){int use_generic=1;int len=in->nb_samples;int p;if(ac->dc){av_dlog(ac->avr,"%d samples - audio_convert: %s to %s (dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> out