yading@10
|
1 /*
|
yading@10
|
2 * Copyright (c) 2012
|
yading@10
|
3 * MIPS Technologies, Inc., California.
|
yading@10
|
4 *
|
yading@10
|
5 * Redistribution and use in source and binary forms, with or without
|
yading@10
|
6 * modification, are permitted provided that the following conditions
|
yading@10
|
7 * are met:
|
yading@10
|
8 * 1. Redistributions of source code must retain the above copyright
|
yading@10
|
9 * notice, this list of conditions and the following disclaimer.
|
yading@10
|
10 * 2. Redistributions in binary form must reproduce the above copyright
|
yading@10
|
11 * notice, this list of conditions and the following disclaimer in the
|
yading@10
|
12 * documentation and/or other materials provided with the distribution.
|
yading@10
|
13 * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
|
yading@10
|
14 * contributors may be used to endorse or promote products derived from
|
yading@10
|
15 * this software without specific prior written permission.
|
yading@10
|
16 *
|
yading@10
|
17 * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
|
yading@10
|
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
yading@10
|
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
yading@10
|
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
|
yading@10
|
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
yading@10
|
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
yading@10
|
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
yading@10
|
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
yading@10
|
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
yading@10
|
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
yading@10
|
27 * SUCH DAMAGE.
|
yading@10
|
28 *
|
yading@10
|
29 * Author: Stanislav Ocovaj (socovaj@mips.com)
|
yading@10
|
30 * Szabolcs Pal (sabolc@mips.com)
|
yading@10
|
31 *
|
yading@10
|
32 * AAC coefficients encoder optimized for MIPS floating-point architecture
|
yading@10
|
33 *
|
yading@10
|
34 * This file is part of FFmpeg.
|
yading@10
|
35 *
|
yading@10
|
36 * FFmpeg is free software; you can redistribute it and/or
|
yading@10
|
37 * modify it under the terms of the GNU Lesser General Public
|
yading@10
|
38 * License as published by the Free Software Foundation; either
|
yading@10
|
39 * version 2.1 of the License, or (at your option) any later version.
|
yading@10
|
40 *
|
yading@10
|
41 * FFmpeg is distributed in the hope that it will be useful,
|
yading@10
|
42 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
yading@10
|
43 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
yading@10
|
44 * Lesser General Public License for more details.
|
yading@10
|
45 *
|
yading@10
|
46 * You should have received a copy of the GNU Lesser General Public
|
yading@10
|
47 * License along with FFmpeg; if not, write to the Free Software
|
yading@10
|
48 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
yading@10
|
49 */
|
yading@10
|
50
|
yading@10
|
51 /**
|
yading@10
|
52 * @file
|
yading@10
|
53 * Reference: libavcodec/aaccoder.c
|
yading@10
|
54 */
|
yading@10
|
55
|
yading@10
|
56 #include "libavutil/libm.h"
|
yading@10
|
57
|
yading@10
|
58 #include <float.h>
|
yading@10
|
59 #include "libavutil/mathematics.h"
|
yading@10
|
60 #include "libavcodec/avcodec.h"
|
yading@10
|
61 #include "libavcodec/put_bits.h"
|
yading@10
|
62 #include "libavcodec/aac.h"
|
yading@10
|
63 #include "libavcodec/aacenc.h"
|
yading@10
|
64 #include "libavcodec/aactab.h"
|
yading@10
|
65
|
yading@10
|
66 #if HAVE_INLINE_ASM
|
yading@10
|
67 typedef struct BandCodingPath {
|
yading@10
|
68 int prev_idx;
|
yading@10
|
69 float cost;
|
yading@10
|
70 int run;
|
yading@10
|
71 } BandCodingPath;
|
yading@10
|
72
|
yading@10
|
73 static const uint8_t run_value_bits_long[64] = {
|
yading@10
|
74 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
yading@10
|
75 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 10,
|
yading@10
|
76 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
|
yading@10
|
77 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 15
|
yading@10
|
78 };
|
yading@10
|
79
|
yading@10
|
80 static const uint8_t run_value_bits_short[16] = {
|
yading@10
|
81 3, 3, 3, 3, 3, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 9
|
yading@10
|
82 };
|
yading@10
|
83
|
yading@10
|
84 static const uint8_t *run_value_bits[2] = {
|
yading@10
|
85 run_value_bits_long, run_value_bits_short
|
yading@10
|
86 };
|
yading@10
|
87
|
yading@10
|
88 static const uint8_t uquad_sign_bits[81] = {
|
yading@10
|
89 0, 1, 1, 1, 2, 2, 1, 2, 2,
|
yading@10
|
90 1, 2, 2, 2, 3, 3, 2, 3, 3,
|
yading@10
|
91 1, 2, 2, 2, 3, 3, 2, 3, 3,
|
yading@10
|
92 1, 2, 2, 2, 3, 3, 2, 3, 3,
|
yading@10
|
93 2, 3, 3, 3, 4, 4, 3, 4, 4,
|
yading@10
|
94 2, 3, 3, 3, 4, 4, 3, 4, 4,
|
yading@10
|
95 1, 2, 2, 2, 3, 3, 2, 3, 3,
|
yading@10
|
96 2, 3, 3, 3, 4, 4, 3, 4, 4,
|
yading@10
|
97 2, 3, 3, 3, 4, 4, 3, 4, 4
|
yading@10
|
98 };
|
yading@10
|
99
|
yading@10
|
100 static const uint8_t upair7_sign_bits[64] = {
|
yading@10
|
101 0, 1, 1, 1, 1, 1, 1, 1,
|
yading@10
|
102 1, 2, 2, 2, 2, 2, 2, 2,
|
yading@10
|
103 1, 2, 2, 2, 2, 2, 2, 2,
|
yading@10
|
104 1, 2, 2, 2, 2, 2, 2, 2,
|
yading@10
|
105 1, 2, 2, 2, 2, 2, 2, 2,
|
yading@10
|
106 1, 2, 2, 2, 2, 2, 2, 2,
|
yading@10
|
107 1, 2, 2, 2, 2, 2, 2, 2,
|
yading@10
|
108 1, 2, 2, 2, 2, 2, 2, 2,
|
yading@10
|
109 };
|
yading@10
|
110
|
yading@10
|
111 static const uint8_t upair12_sign_bits[169] = {
|
yading@10
|
112 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
yading@10
|
113 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
yading@10
|
114 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
yading@10
|
115 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
yading@10
|
116 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
yading@10
|
117 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
yading@10
|
118 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
yading@10
|
119 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
yading@10
|
120 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
yading@10
|
121 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
yading@10
|
122 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
yading@10
|
123 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
yading@10
|
124 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
yading@10
|
125 };
|
yading@10
|
126
|
yading@10
|
127 static const uint8_t esc_sign_bits[289] = {
|
yading@10
|
128 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
yading@10
|
129 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
yading@10
|
130 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
yading@10
|
131 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
yading@10
|
132 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
yading@10
|
133 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
yading@10
|
134 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
yading@10
|
135 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
yading@10
|
136 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
yading@10
|
137 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
yading@10
|
138 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
yading@10
|
139 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
yading@10
|
140 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
yading@10
|
141 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
yading@10
|
142 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
yading@10
|
143 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
yading@10
|
144 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
yading@10
|
145 };
|
yading@10
|
146
|
yading@10
|
147 static void abs_pow34_v(float *out, const float *in, const int size) {
|
yading@10
|
148 #ifndef USE_REALLY_FULL_SEARCH
|
yading@10
|
149 int i;
|
yading@10
|
150 float a, b, c, d;
|
yading@10
|
151 float ax, bx, cx, dx;
|
yading@10
|
152
|
yading@10
|
153 for (i = 0; i < size; i += 4) {
|
yading@10
|
154 a = fabsf(in[i ]);
|
yading@10
|
155 b = fabsf(in[i+1]);
|
yading@10
|
156 c = fabsf(in[i+2]);
|
yading@10
|
157 d = fabsf(in[i+3]);
|
yading@10
|
158
|
yading@10
|
159 ax = sqrtf(a);
|
yading@10
|
160 bx = sqrtf(b);
|
yading@10
|
161 cx = sqrtf(c);
|
yading@10
|
162 dx = sqrtf(d);
|
yading@10
|
163
|
yading@10
|
164 a = a * ax;
|
yading@10
|
165 b = b * bx;
|
yading@10
|
166 c = c * cx;
|
yading@10
|
167 d = d * dx;
|
yading@10
|
168
|
yading@10
|
169 out[i ] = sqrtf(a);
|
yading@10
|
170 out[i+1] = sqrtf(b);
|
yading@10
|
171 out[i+2] = sqrtf(c);
|
yading@10
|
172 out[i+3] = sqrtf(d);
|
yading@10
|
173 }
|
yading@10
|
174 #endif /* USE_REALLY_FULL_SEARCH */
|
yading@10
|
175 }
|
yading@10
|
176
|
yading@10
|
177 static float find_max_val(int group_len, int swb_size, const float *scaled) {
|
yading@10
|
178 float maxval = 0.0f;
|
yading@10
|
179 int w2, i;
|
yading@10
|
180 for (w2 = 0; w2 < group_len; w2++) {
|
yading@10
|
181 for (i = 0; i < swb_size; i++) {
|
yading@10
|
182 maxval = FFMAX(maxval, scaled[w2*128+i]);
|
yading@10
|
183 }
|
yading@10
|
184 }
|
yading@10
|
185 return maxval;
|
yading@10
|
186 }
|
yading@10
|
187
|
yading@10
|
188 static int find_min_book(float maxval, int sf) {
|
yading@10
|
189 float Q = ff_aac_pow2sf_tab[POW_SF2_ZERO - sf + SCALE_ONE_POS - SCALE_DIV_512];
|
yading@10
|
190 float Q34 = sqrtf(Q * sqrtf(Q));
|
yading@10
|
191 int qmaxval, cb;
|
yading@10
|
192 qmaxval = maxval * Q34 + 0.4054f;
|
yading@10
|
193 if (qmaxval == 0) cb = 0;
|
yading@10
|
194 else if (qmaxval == 1) cb = 1;
|
yading@10
|
195 else if (qmaxval == 2) cb = 3;
|
yading@10
|
196 else if (qmaxval <= 4) cb = 5;
|
yading@10
|
197 else if (qmaxval <= 7) cb = 7;
|
yading@10
|
198 else if (qmaxval <= 12) cb = 9;
|
yading@10
|
199 else cb = 11;
|
yading@10
|
200 return cb;
|
yading@10
|
201 }
|
yading@10
|
202
|
yading@10
|
203 /**
|
yading@10
|
204 * Functions developed from template function and optimized for quantizing and encoding band
|
yading@10
|
205 */
|
yading@10
|
206 static void quantize_and_encode_band_cost_SQUAD_mips(struct AACEncContext *s,
|
yading@10
|
207 PutBitContext *pb, const float *in,
|
yading@10
|
208 const float *scaled, int size, int scale_idx,
|
yading@10
|
209 int cb, const float lambda, const float uplim,
|
yading@10
|
210 int *bits)
|
yading@10
|
211 {
|
yading@10
|
212 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
|
yading@10
|
213 int i;
|
yading@10
|
214 int qc1, qc2, qc3, qc4;
|
yading@10
|
215
|
yading@10
|
216 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
|
yading@10
|
217 uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
|
yading@10
|
218
|
yading@10
|
219 abs_pow34_v(s->scoefs, in, size);
|
yading@10
|
220 scaled = s->scoefs;
|
yading@10
|
221 for (i = 0; i < size; i += 4) {
|
yading@10
|
222 int curidx;
|
yading@10
|
223 int *in_int = (int *)&in[i];
|
yading@10
|
224
|
yading@10
|
225 qc1 = scaled[i ] * Q34 + 0.4054f;
|
yading@10
|
226 qc2 = scaled[i+1] * Q34 + 0.4054f;
|
yading@10
|
227 qc3 = scaled[i+2] * Q34 + 0.4054f;
|
yading@10
|
228 qc4 = scaled[i+3] * Q34 + 0.4054f;
|
yading@10
|
229
|
yading@10
|
230 __asm__ volatile (
|
yading@10
|
231 ".set push \n\t"
|
yading@10
|
232 ".set noreorder \n\t"
|
yading@10
|
233
|
yading@10
|
234 "slt %[qc1], $zero, %[qc1] \n\t"
|
yading@10
|
235 "slt %[qc2], $zero, %[qc2] \n\t"
|
yading@10
|
236 "slt %[qc3], $zero, %[qc3] \n\t"
|
yading@10
|
237 "slt %[qc4], $zero, %[qc4] \n\t"
|
yading@10
|
238 "lw $t0, 0(%[in_int]) \n\t"
|
yading@10
|
239 "lw $t1, 4(%[in_int]) \n\t"
|
yading@10
|
240 "lw $t2, 8(%[in_int]) \n\t"
|
yading@10
|
241 "lw $t3, 12(%[in_int]) \n\t"
|
yading@10
|
242 "srl $t0, $t0, 31 \n\t"
|
yading@10
|
243 "srl $t1, $t1, 31 \n\t"
|
yading@10
|
244 "srl $t2, $t2, 31 \n\t"
|
yading@10
|
245 "srl $t3, $t3, 31 \n\t"
|
yading@10
|
246 "subu $t4, $zero, %[qc1] \n\t"
|
yading@10
|
247 "subu $t5, $zero, %[qc2] \n\t"
|
yading@10
|
248 "subu $t6, $zero, %[qc3] \n\t"
|
yading@10
|
249 "subu $t7, $zero, %[qc4] \n\t"
|
yading@10
|
250 "movn %[qc1], $t4, $t0 \n\t"
|
yading@10
|
251 "movn %[qc2], $t5, $t1 \n\t"
|
yading@10
|
252 "movn %[qc3], $t6, $t2 \n\t"
|
yading@10
|
253 "movn %[qc4], $t7, $t3 \n\t"
|
yading@10
|
254
|
yading@10
|
255 ".set pop \n\t"
|
yading@10
|
256
|
yading@10
|
257 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
|
yading@10
|
258 [qc3]"+r"(qc3), [qc4]"+r"(qc4)
|
yading@10
|
259 : [in_int]"r"(in_int)
|
yading@10
|
260 : "t0", "t1", "t2", "t3",
|
yading@10
|
261 "t4", "t5", "t6", "t7",
|
yading@10
|
262 "memory"
|
yading@10
|
263 );
|
yading@10
|
264
|
yading@10
|
265 curidx = qc1;
|
yading@10
|
266 curidx *= 3;
|
yading@10
|
267 curidx += qc2;
|
yading@10
|
268 curidx *= 3;
|
yading@10
|
269 curidx += qc3;
|
yading@10
|
270 curidx *= 3;
|
yading@10
|
271 curidx += qc4;
|
yading@10
|
272 curidx += 40;
|
yading@10
|
273
|
yading@10
|
274 put_bits(pb, p_bits[curidx], p_codes[curidx]);
|
yading@10
|
275 }
|
yading@10
|
276 }
|
yading@10
|
277
|
yading@10
|
278 static void quantize_and_encode_band_cost_UQUAD_mips(struct AACEncContext *s,
|
yading@10
|
279 PutBitContext *pb, const float *in,
|
yading@10
|
280 const float *scaled, int size, int scale_idx,
|
yading@10
|
281 int cb, const float lambda, const float uplim,
|
yading@10
|
282 int *bits)
|
yading@10
|
283 {
|
yading@10
|
284 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
|
yading@10
|
285 int i;
|
yading@10
|
286 int qc1, qc2, qc3, qc4;
|
yading@10
|
287
|
yading@10
|
288 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
|
yading@10
|
289 uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
|
yading@10
|
290
|
yading@10
|
291 abs_pow34_v(s->scoefs, in, size);
|
yading@10
|
292 scaled = s->scoefs;
|
yading@10
|
293 for (i = 0; i < size; i += 4) {
|
yading@10
|
294 int curidx, sign, count;
|
yading@10
|
295 int *in_int = (int *)&in[i];
|
yading@10
|
296 uint8_t v_bits;
|
yading@10
|
297 unsigned int v_codes;
|
yading@10
|
298
|
yading@10
|
299 qc1 = scaled[i ] * Q34 + 0.4054f;
|
yading@10
|
300 qc2 = scaled[i+1] * Q34 + 0.4054f;
|
yading@10
|
301 qc3 = scaled[i+2] * Q34 + 0.4054f;
|
yading@10
|
302 qc4 = scaled[i+3] * Q34 + 0.4054f;
|
yading@10
|
303
|
yading@10
|
304 __asm__ volatile (
|
yading@10
|
305 ".set push \n\t"
|
yading@10
|
306 ".set noreorder \n\t"
|
yading@10
|
307
|
yading@10
|
308 "ori $t4, $zero, 2 \n\t"
|
yading@10
|
309 "ori %[sign], $zero, 0 \n\t"
|
yading@10
|
310 "slt $t0, $t4, %[qc1] \n\t"
|
yading@10
|
311 "slt $t1, $t4, %[qc2] \n\t"
|
yading@10
|
312 "slt $t2, $t4, %[qc3] \n\t"
|
yading@10
|
313 "slt $t3, $t4, %[qc4] \n\t"
|
yading@10
|
314 "movn %[qc1], $t4, $t0 \n\t"
|
yading@10
|
315 "movn %[qc2], $t4, $t1 \n\t"
|
yading@10
|
316 "movn %[qc3], $t4, $t2 \n\t"
|
yading@10
|
317 "movn %[qc4], $t4, $t3 \n\t"
|
yading@10
|
318 "lw $t0, 0(%[in_int]) \n\t"
|
yading@10
|
319 "lw $t1, 4(%[in_int]) \n\t"
|
yading@10
|
320 "lw $t2, 8(%[in_int]) \n\t"
|
yading@10
|
321 "lw $t3, 12(%[in_int]) \n\t"
|
yading@10
|
322 "slt $t0, $t0, $zero \n\t"
|
yading@10
|
323 "movn %[sign], $t0, %[qc1] \n\t"
|
yading@10
|
324 "slt $t1, $t1, $zero \n\t"
|
yading@10
|
325 "slt $t2, $t2, $zero \n\t"
|
yading@10
|
326 "slt $t3, $t3, $zero \n\t"
|
yading@10
|
327 "sll $t0, %[sign], 1 \n\t"
|
yading@10
|
328 "or $t0, $t0, $t1 \n\t"
|
yading@10
|
329 "movn %[sign], $t0, %[qc2] \n\t"
|
yading@10
|
330 "slt $t4, $zero, %[qc1] \n\t"
|
yading@10
|
331 "slt $t1, $zero, %[qc2] \n\t"
|
yading@10
|
332 "slt %[count], $zero, %[qc3] \n\t"
|
yading@10
|
333 "sll $t0, %[sign], 1 \n\t"
|
yading@10
|
334 "or $t0, $t0, $t2 \n\t"
|
yading@10
|
335 "movn %[sign], $t0, %[qc3] \n\t"
|
yading@10
|
336 "slt $t2, $zero, %[qc4] \n\t"
|
yading@10
|
337 "addu %[count], %[count], $t4 \n\t"
|
yading@10
|
338 "addu %[count], %[count], $t1 \n\t"
|
yading@10
|
339 "sll $t0, %[sign], 1 \n\t"
|
yading@10
|
340 "or $t0, $t0, $t3 \n\t"
|
yading@10
|
341 "movn %[sign], $t0, %[qc4] \n\t"
|
yading@10
|
342 "addu %[count], %[count], $t2 \n\t"
|
yading@10
|
343
|
yading@10
|
344 ".set pop \n\t"
|
yading@10
|
345
|
yading@10
|
346 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
|
yading@10
|
347 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
|
yading@10
|
348 [sign]"=&r"(sign), [count]"=&r"(count)
|
yading@10
|
349 : [in_int]"r"(in_int)
|
yading@10
|
350 : "t0", "t1", "t2", "t3", "t4",
|
yading@10
|
351 "memory"
|
yading@10
|
352 );
|
yading@10
|
353
|
yading@10
|
354 curidx = qc1;
|
yading@10
|
355 curidx *= 3;
|
yading@10
|
356 curidx += qc2;
|
yading@10
|
357 curidx *= 3;
|
yading@10
|
358 curidx += qc3;
|
yading@10
|
359 curidx *= 3;
|
yading@10
|
360 curidx += qc4;
|
yading@10
|
361
|
yading@10
|
362 v_codes = (p_codes[curidx] << count) | (sign & ((1 << count) - 1));
|
yading@10
|
363 v_bits = p_bits[curidx] + count;
|
yading@10
|
364 put_bits(pb, v_bits, v_codes);
|
yading@10
|
365 }
|
yading@10
|
366 }
|
yading@10
|
367
|
yading@10
|
368 static void quantize_and_encode_band_cost_SPAIR_mips(struct AACEncContext *s,
|
yading@10
|
369 PutBitContext *pb, const float *in,
|
yading@10
|
370 const float *scaled, int size, int scale_idx,
|
yading@10
|
371 int cb, const float lambda, const float uplim,
|
yading@10
|
372 int *bits)
|
yading@10
|
373 {
|
yading@10
|
374 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
|
yading@10
|
375 int i;
|
yading@10
|
376 int qc1, qc2, qc3, qc4;
|
yading@10
|
377
|
yading@10
|
378 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
|
yading@10
|
379 uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
|
yading@10
|
380
|
yading@10
|
381 abs_pow34_v(s->scoefs, in, size);
|
yading@10
|
382 scaled = s->scoefs;
|
yading@10
|
383 for (i = 0; i < size; i += 4) {
|
yading@10
|
384 int curidx, curidx2;
|
yading@10
|
385 int *in_int = (int *)&in[i];
|
yading@10
|
386 uint8_t v_bits;
|
yading@10
|
387 unsigned int v_codes;
|
yading@10
|
388
|
yading@10
|
389 qc1 = scaled[i ] * Q34 + 0.4054f;
|
yading@10
|
390 qc2 = scaled[i+1] * Q34 + 0.4054f;
|
yading@10
|
391 qc3 = scaled[i+2] * Q34 + 0.4054f;
|
yading@10
|
392 qc4 = scaled[i+3] * Q34 + 0.4054f;
|
yading@10
|
393
|
yading@10
|
394 __asm__ volatile (
|
yading@10
|
395 ".set push \n\t"
|
yading@10
|
396 ".set noreorder \n\t"
|
yading@10
|
397
|
yading@10
|
398 "ori $t4, $zero, 4 \n\t"
|
yading@10
|
399 "slt $t0, $t4, %[qc1] \n\t"
|
yading@10
|
400 "slt $t1, $t4, %[qc2] \n\t"
|
yading@10
|
401 "slt $t2, $t4, %[qc3] \n\t"
|
yading@10
|
402 "slt $t3, $t4, %[qc4] \n\t"
|
yading@10
|
403 "movn %[qc1], $t4, $t0 \n\t"
|
yading@10
|
404 "movn %[qc2], $t4, $t1 \n\t"
|
yading@10
|
405 "movn %[qc3], $t4, $t2 \n\t"
|
yading@10
|
406 "movn %[qc4], $t4, $t3 \n\t"
|
yading@10
|
407 "lw $t0, 0(%[in_int]) \n\t"
|
yading@10
|
408 "lw $t1, 4(%[in_int]) \n\t"
|
yading@10
|
409 "lw $t2, 8(%[in_int]) \n\t"
|
yading@10
|
410 "lw $t3, 12(%[in_int]) \n\t"
|
yading@10
|
411 "srl $t0, $t0, 31 \n\t"
|
yading@10
|
412 "srl $t1, $t1, 31 \n\t"
|
yading@10
|
413 "srl $t2, $t2, 31 \n\t"
|
yading@10
|
414 "srl $t3, $t3, 31 \n\t"
|
yading@10
|
415 "subu $t4, $zero, %[qc1] \n\t"
|
yading@10
|
416 "subu $t5, $zero, %[qc2] \n\t"
|
yading@10
|
417 "subu $t6, $zero, %[qc3] \n\t"
|
yading@10
|
418 "subu $t7, $zero, %[qc4] \n\t"
|
yading@10
|
419 "movn %[qc1], $t4, $t0 \n\t"
|
yading@10
|
420 "movn %[qc2], $t5, $t1 \n\t"
|
yading@10
|
421 "movn %[qc3], $t6, $t2 \n\t"
|
yading@10
|
422 "movn %[qc4], $t7, $t3 \n\t"
|
yading@10
|
423
|
yading@10
|
424 ".set pop \n\t"
|
yading@10
|
425
|
yading@10
|
426 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
|
yading@10
|
427 [qc3]"+r"(qc3), [qc4]"+r"(qc4)
|
yading@10
|
428 : [in_int]"r"(in_int)
|
yading@10
|
429 : "t0", "t1", "t2", "t3",
|
yading@10
|
430 "t4", "t5", "t6", "t7",
|
yading@10
|
431 "memory"
|
yading@10
|
432 );
|
yading@10
|
433
|
yading@10
|
434 curidx = 9 * qc1;
|
yading@10
|
435 curidx += qc2 + 40;
|
yading@10
|
436
|
yading@10
|
437 curidx2 = 9 * qc3;
|
yading@10
|
438 curidx2 += qc4 + 40;
|
yading@10
|
439
|
yading@10
|
440 v_codes = (p_codes[curidx] << p_bits[curidx2]) | (p_codes[curidx2]);
|
yading@10
|
441 v_bits = p_bits[curidx] + p_bits[curidx2];
|
yading@10
|
442 put_bits(pb, v_bits, v_codes);
|
yading@10
|
443 }
|
yading@10
|
444 }
|
yading@10
|
445
|
yading@10
|
446 static void quantize_and_encode_band_cost_UPAIR7_mips(struct AACEncContext *s,
|
yading@10
|
447 PutBitContext *pb, const float *in,
|
yading@10
|
448 const float *scaled, int size, int scale_idx,
|
yading@10
|
449 int cb, const float lambda, const float uplim,
|
yading@10
|
450 int *bits)
|
yading@10
|
451 {
|
yading@10
|
452 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
|
yading@10
|
453 int i;
|
yading@10
|
454 int qc1, qc2, qc3, qc4;
|
yading@10
|
455
|
yading@10
|
456 uint8_t *p_bits = (uint8_t*) ff_aac_spectral_bits[cb-1];
|
yading@10
|
457 uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
|
yading@10
|
458
|
yading@10
|
459 abs_pow34_v(s->scoefs, in, size);
|
yading@10
|
460 scaled = s->scoefs;
|
yading@10
|
461 for (i = 0; i < size; i += 4) {
|
yading@10
|
462 int curidx, sign1, count1, sign2, count2;
|
yading@10
|
463 int *in_int = (int *)&in[i];
|
yading@10
|
464 uint8_t v_bits;
|
yading@10
|
465 unsigned int v_codes;
|
yading@10
|
466
|
yading@10
|
467 qc1 = scaled[i ] * Q34 + 0.4054f;
|
yading@10
|
468 qc2 = scaled[i+1] * Q34 + 0.4054f;
|
yading@10
|
469 qc3 = scaled[i+2] * Q34 + 0.4054f;
|
yading@10
|
470 qc4 = scaled[i+3] * Q34 + 0.4054f;
|
yading@10
|
471
|
yading@10
|
472 __asm__ volatile (
|
yading@10
|
473 ".set push \n\t"
|
yading@10
|
474 ".set noreorder \n\t"
|
yading@10
|
475
|
yading@10
|
476 "ori $t4, $zero, 7 \n\t"
|
yading@10
|
477 "ori %[sign1], $zero, 0 \n\t"
|
yading@10
|
478 "ori %[sign2], $zero, 0 \n\t"
|
yading@10
|
479 "slt $t0, $t4, %[qc1] \n\t"
|
yading@10
|
480 "slt $t1, $t4, %[qc2] \n\t"
|
yading@10
|
481 "slt $t2, $t4, %[qc3] \n\t"
|
yading@10
|
482 "slt $t3, $t4, %[qc4] \n\t"
|
yading@10
|
483 "movn %[qc1], $t4, $t0 \n\t"
|
yading@10
|
484 "movn %[qc2], $t4, $t1 \n\t"
|
yading@10
|
485 "movn %[qc3], $t4, $t2 \n\t"
|
yading@10
|
486 "movn %[qc4], $t4, $t3 \n\t"
|
yading@10
|
487 "lw $t0, 0(%[in_int]) \n\t"
|
yading@10
|
488 "lw $t1, 4(%[in_int]) \n\t"
|
yading@10
|
489 "lw $t2, 8(%[in_int]) \n\t"
|
yading@10
|
490 "lw $t3, 12(%[in_int]) \n\t"
|
yading@10
|
491 "slt $t0, $t0, $zero \n\t"
|
yading@10
|
492 "movn %[sign1], $t0, %[qc1] \n\t"
|
yading@10
|
493 "slt $t2, $t2, $zero \n\t"
|
yading@10
|
494 "movn %[sign2], $t2, %[qc3] \n\t"
|
yading@10
|
495 "slt $t1, $t1, $zero \n\t"
|
yading@10
|
496 "sll $t0, %[sign1], 1 \n\t"
|
yading@10
|
497 "or $t0, $t0, $t1 \n\t"
|
yading@10
|
498 "movn %[sign1], $t0, %[qc2] \n\t"
|
yading@10
|
499 "slt $t3, $t3, $zero \n\t"
|
yading@10
|
500 "sll $t0, %[sign2], 1 \n\t"
|
yading@10
|
501 "or $t0, $t0, $t3 \n\t"
|
yading@10
|
502 "movn %[sign2], $t0, %[qc4] \n\t"
|
yading@10
|
503 "slt %[count1], $zero, %[qc1] \n\t"
|
yading@10
|
504 "slt $t1, $zero, %[qc2] \n\t"
|
yading@10
|
505 "slt %[count2], $zero, %[qc3] \n\t"
|
yading@10
|
506 "slt $t2, $zero, %[qc4] \n\t"
|
yading@10
|
507 "addu %[count1], %[count1], $t1 \n\t"
|
yading@10
|
508 "addu %[count2], %[count2], $t2 \n\t"
|
yading@10
|
509
|
yading@10
|
510 ".set pop \n\t"
|
yading@10
|
511
|
yading@10
|
512 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
|
yading@10
|
513 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
|
yading@10
|
514 [sign1]"=&r"(sign1), [count1]"=&r"(count1),
|
yading@10
|
515 [sign2]"=&r"(sign2), [count2]"=&r"(count2)
|
yading@10
|
516 : [in_int]"r"(in_int)
|
yading@10
|
517 : "t0", "t1", "t2", "t3", "t4",
|
yading@10
|
518 "memory"
|
yading@10
|
519 );
|
yading@10
|
520
|
yading@10
|
521 curidx = 8 * qc1;
|
yading@10
|
522 curidx += qc2;
|
yading@10
|
523
|
yading@10
|
524 v_codes = (p_codes[curidx] << count1) | sign1;
|
yading@10
|
525 v_bits = p_bits[curidx] + count1;
|
yading@10
|
526 put_bits(pb, v_bits, v_codes);
|
yading@10
|
527
|
yading@10
|
528 curidx = 8 * qc3;
|
yading@10
|
529 curidx += qc4;
|
yading@10
|
530
|
yading@10
|
531 v_codes = (p_codes[curidx] << count2) | sign2;
|
yading@10
|
532 v_bits = p_bits[curidx] + count2;
|
yading@10
|
533 put_bits(pb, v_bits, v_codes);
|
yading@10
|
534 }
|
yading@10
|
535 }
|
yading@10
|
536
|
yading@10
|
537 static void quantize_and_encode_band_cost_UPAIR12_mips(struct AACEncContext *s,
|
yading@10
|
538 PutBitContext *pb, const float *in,
|
yading@10
|
539 const float *scaled, int size, int scale_idx,
|
yading@10
|
540 int cb, const float lambda, const float uplim,
|
yading@10
|
541 int *bits)
|
yading@10
|
542 {
|
yading@10
|
543 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
|
yading@10
|
544 int i;
|
yading@10
|
545 int qc1, qc2, qc3, qc4;
|
yading@10
|
546
|
yading@10
|
547 uint8_t *p_bits = (uint8_t*) ff_aac_spectral_bits[cb-1];
|
yading@10
|
548 uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
|
yading@10
|
549
|
yading@10
|
550 abs_pow34_v(s->scoefs, in, size);
|
yading@10
|
551 scaled = s->scoefs;
|
yading@10
|
552 for (i = 0; i < size; i += 4) {
|
yading@10
|
553 int curidx, sign1, count1, sign2, count2;
|
yading@10
|
554 int *in_int = (int *)&in[i];
|
yading@10
|
555 uint8_t v_bits;
|
yading@10
|
556 unsigned int v_codes;
|
yading@10
|
557
|
yading@10
|
558 qc1 = scaled[i ] * Q34 + 0.4054f;
|
yading@10
|
559 qc2 = scaled[i+1] * Q34 + 0.4054f;
|
yading@10
|
560 qc3 = scaled[i+2] * Q34 + 0.4054f;
|
yading@10
|
561 qc4 = scaled[i+3] * Q34 + 0.4054f;
|
yading@10
|
562
|
yading@10
|
563 __asm__ volatile (
|
yading@10
|
564 ".set push \n\t"
|
yading@10
|
565 ".set noreorder \n\t"
|
yading@10
|
566
|
yading@10
|
567 "ori $t4, $zero, 12 \n\t"
|
yading@10
|
568 "ori %[sign1], $zero, 0 \n\t"
|
yading@10
|
569 "ori %[sign2], $zero, 0 \n\t"
|
yading@10
|
570 "slt $t0, $t4, %[qc1] \n\t"
|
yading@10
|
571 "slt $t1, $t4, %[qc2] \n\t"
|
yading@10
|
572 "slt $t2, $t4, %[qc3] \n\t"
|
yading@10
|
573 "slt $t3, $t4, %[qc4] \n\t"
|
yading@10
|
574 "movn %[qc1], $t4, $t0 \n\t"
|
yading@10
|
575 "movn %[qc2], $t4, $t1 \n\t"
|
yading@10
|
576 "movn %[qc3], $t4, $t2 \n\t"
|
yading@10
|
577 "movn %[qc4], $t4, $t3 \n\t"
|
yading@10
|
578 "lw $t0, 0(%[in_int]) \n\t"
|
yading@10
|
579 "lw $t1, 4(%[in_int]) \n\t"
|
yading@10
|
580 "lw $t2, 8(%[in_int]) \n\t"
|
yading@10
|
581 "lw $t3, 12(%[in_int]) \n\t"
|
yading@10
|
582 "slt $t0, $t0, $zero \n\t"
|
yading@10
|
583 "movn %[sign1], $t0, %[qc1] \n\t"
|
yading@10
|
584 "slt $t2, $t2, $zero \n\t"
|
yading@10
|
585 "movn %[sign2], $t2, %[qc3] \n\t"
|
yading@10
|
586 "slt $t1, $t1, $zero \n\t"
|
yading@10
|
587 "sll $t0, %[sign1], 1 \n\t"
|
yading@10
|
588 "or $t0, $t0, $t1 \n\t"
|
yading@10
|
589 "movn %[sign1], $t0, %[qc2] \n\t"
|
yading@10
|
590 "slt $t3, $t3, $zero \n\t"
|
yading@10
|
591 "sll $t0, %[sign2], 1 \n\t"
|
yading@10
|
592 "or $t0, $t0, $t3 \n\t"
|
yading@10
|
593 "movn %[sign2], $t0, %[qc4] \n\t"
|
yading@10
|
594 "slt %[count1], $zero, %[qc1] \n\t"
|
yading@10
|
595 "slt $t1, $zero, %[qc2] \n\t"
|
yading@10
|
596 "slt %[count2], $zero, %[qc3] \n\t"
|
yading@10
|
597 "slt $t2, $zero, %[qc4] \n\t"
|
yading@10
|
598 "addu %[count1], %[count1], $t1 \n\t"
|
yading@10
|
599 "addu %[count2], %[count2], $t2 \n\t"
|
yading@10
|
600
|
yading@10
|
601 ".set pop \n\t"
|
yading@10
|
602
|
yading@10
|
603 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
|
yading@10
|
604 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
|
yading@10
|
605 [sign1]"=&r"(sign1), [count1]"=&r"(count1),
|
yading@10
|
606 [sign2]"=&r"(sign2), [count2]"=&r"(count2)
|
yading@10
|
607 : [in_int]"r"(in_int)
|
yading@10
|
608 : "t0", "t1", "t2", "t3", "t4",
|
yading@10
|
609 "memory"
|
yading@10
|
610 );
|
yading@10
|
611
|
yading@10
|
612 curidx = 13 * qc1;
|
yading@10
|
613 curidx += qc2;
|
yading@10
|
614
|
yading@10
|
615 v_codes = (p_codes[curidx] << count1) | sign1;
|
yading@10
|
616 v_bits = p_bits[curidx] + count1;
|
yading@10
|
617 put_bits(pb, v_bits, v_codes);
|
yading@10
|
618
|
yading@10
|
619 curidx = 13 * qc3;
|
yading@10
|
620 curidx += qc4;
|
yading@10
|
621
|
yading@10
|
622 v_codes = (p_codes[curidx] << count2) | sign2;
|
yading@10
|
623 v_bits = p_bits[curidx] + count2;
|
yading@10
|
624 put_bits(pb, v_bits, v_codes);
|
yading@10
|
625 }
|
yading@10
|
626 }
|
yading@10
|
627
|
yading@10
|
628 static void quantize_and_encode_band_cost_ESC_mips(struct AACEncContext *s,
|
yading@10
|
629 PutBitContext *pb, const float *in,
|
yading@10
|
630 const float *scaled, int size, int scale_idx,
|
yading@10
|
631 int cb, const float lambda, const float uplim,
|
yading@10
|
632 int *bits)
|
yading@10
|
633 {
|
yading@10
|
634 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
|
yading@10
|
635 int i;
|
yading@10
|
636 int qc1, qc2, qc3, qc4;
|
yading@10
|
637
|
yading@10
|
638 uint8_t *p_bits = (uint8_t* )ff_aac_spectral_bits[cb-1];
|
yading@10
|
639 uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
|
yading@10
|
640 float *p_vectors = (float* )ff_aac_codebook_vectors[cb-1];
|
yading@10
|
641
|
yading@10
|
642 abs_pow34_v(s->scoefs, in, size);
|
yading@10
|
643 scaled = s->scoefs;
|
yading@10
|
644
|
yading@10
|
645 if (cb < 11) {
|
yading@10
|
646 for (i = 0; i < size; i += 4) {
|
yading@10
|
647 int curidx, curidx2, sign1, count1, sign2, count2;
|
yading@10
|
648 int *in_int = (int *)&in[i];
|
yading@10
|
649 uint8_t v_bits;
|
yading@10
|
650 unsigned int v_codes;
|
yading@10
|
651
|
yading@10
|
652 qc1 = scaled[i ] * Q34 + 0.4054f;
|
yading@10
|
653 qc2 = scaled[i+1] * Q34 + 0.4054f;
|
yading@10
|
654 qc3 = scaled[i+2] * Q34 + 0.4054f;
|
yading@10
|
655 qc4 = scaled[i+3] * Q34 + 0.4054f;
|
yading@10
|
656
|
yading@10
|
657 __asm__ volatile (
|
yading@10
|
658 ".set push \n\t"
|
yading@10
|
659 ".set noreorder \n\t"
|
yading@10
|
660
|
yading@10
|
661 "ori $t4, $zero, 16 \n\t"
|
yading@10
|
662 "ori %[sign1], $zero, 0 \n\t"
|
yading@10
|
663 "ori %[sign2], $zero, 0 \n\t"
|
yading@10
|
664 "slt $t0, $t4, %[qc1] \n\t"
|
yading@10
|
665 "slt $t1, $t4, %[qc2] \n\t"
|
yading@10
|
666 "slt $t2, $t4, %[qc3] \n\t"
|
yading@10
|
667 "slt $t3, $t4, %[qc4] \n\t"
|
yading@10
|
668 "movn %[qc1], $t4, $t0 \n\t"
|
yading@10
|
669 "movn %[qc2], $t4, $t1 \n\t"
|
yading@10
|
670 "movn %[qc3], $t4, $t2 \n\t"
|
yading@10
|
671 "movn %[qc4], $t4, $t3 \n\t"
|
yading@10
|
672 "lw $t0, 0(%[in_int]) \n\t"
|
yading@10
|
673 "lw $t1, 4(%[in_int]) \n\t"
|
yading@10
|
674 "lw $t2, 8(%[in_int]) \n\t"
|
yading@10
|
675 "lw $t3, 12(%[in_int]) \n\t"
|
yading@10
|
676 "slt $t0, $t0, $zero \n\t"
|
yading@10
|
677 "movn %[sign1], $t0, %[qc1] \n\t"
|
yading@10
|
678 "slt $t2, $t2, $zero \n\t"
|
yading@10
|
679 "movn %[sign2], $t2, %[qc3] \n\t"
|
yading@10
|
680 "slt $t1, $t1, $zero \n\t"
|
yading@10
|
681 "sll $t0, %[sign1], 1 \n\t"
|
yading@10
|
682 "or $t0, $t0, $t1 \n\t"
|
yading@10
|
683 "movn %[sign1], $t0, %[qc2] \n\t"
|
yading@10
|
684 "slt $t3, $t3, $zero \n\t"
|
yading@10
|
685 "sll $t0, %[sign2], 1 \n\t"
|
yading@10
|
686 "or $t0, $t0, $t3 \n\t"
|
yading@10
|
687 "movn %[sign2], $t0, %[qc4] \n\t"
|
yading@10
|
688 "slt %[count1], $zero, %[qc1] \n\t"
|
yading@10
|
689 "slt $t1, $zero, %[qc2] \n\t"
|
yading@10
|
690 "slt %[count2], $zero, %[qc3] \n\t"
|
yading@10
|
691 "slt $t2, $zero, %[qc4] \n\t"
|
yading@10
|
692 "addu %[count1], %[count1], $t1 \n\t"
|
yading@10
|
693 "addu %[count2], %[count2], $t2 \n\t"
|
yading@10
|
694
|
yading@10
|
695 ".set pop \n\t"
|
yading@10
|
696
|
yading@10
|
697 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
|
yading@10
|
698 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
|
yading@10
|
699 [sign1]"=&r"(sign1), [count1]"=&r"(count1),
|
yading@10
|
700 [sign2]"=&r"(sign2), [count2]"=&r"(count2)
|
yading@10
|
701 : [in_int]"r"(in_int)
|
yading@10
|
702 : "t0", "t1", "t2", "t3", "t4",
|
yading@10
|
703 "memory"
|
yading@10
|
704 );
|
yading@10
|
705
|
yading@10
|
706 curidx = 17 * qc1;
|
yading@10
|
707 curidx += qc2;
|
yading@10
|
708 curidx2 = 17 * qc3;
|
yading@10
|
709 curidx2 += qc4;
|
yading@10
|
710
|
yading@10
|
711 v_codes = (p_codes[curidx] << count1) | sign1;
|
yading@10
|
712 v_bits = p_bits[curidx] + count1;
|
yading@10
|
713 put_bits(pb, v_bits, v_codes);
|
yading@10
|
714
|
yading@10
|
715 v_codes = (p_codes[curidx2] << count2) | sign2;
|
yading@10
|
716 v_bits = p_bits[curidx2] + count2;
|
yading@10
|
717 put_bits(pb, v_bits, v_codes);
|
yading@10
|
718 }
|
yading@10
|
719 } else {
|
yading@10
|
720 for (i = 0; i < size; i += 4) {
|
yading@10
|
721 int curidx, curidx2, sign1, count1, sign2, count2;
|
yading@10
|
722 int *in_int = (int *)&in[i];
|
yading@10
|
723 uint8_t v_bits;
|
yading@10
|
724 unsigned int v_codes;
|
yading@10
|
725 int c1, c2, c3, c4;
|
yading@10
|
726
|
yading@10
|
727 qc1 = scaled[i ] * Q34 + 0.4054f;
|
yading@10
|
728 qc2 = scaled[i+1] * Q34 + 0.4054f;
|
yading@10
|
729 qc3 = scaled[i+2] * Q34 + 0.4054f;
|
yading@10
|
730 qc4 = scaled[i+3] * Q34 + 0.4054f;
|
yading@10
|
731
|
yading@10
|
732 __asm__ volatile (
|
yading@10
|
733 ".set push \n\t"
|
yading@10
|
734 ".set noreorder \n\t"
|
yading@10
|
735
|
yading@10
|
736 "ori $t4, $zero, 16 \n\t"
|
yading@10
|
737 "ori %[sign1], $zero, 0 \n\t"
|
yading@10
|
738 "ori %[sign2], $zero, 0 \n\t"
|
yading@10
|
739 "shll_s.w %[c1], %[qc1], 18 \n\t"
|
yading@10
|
740 "shll_s.w %[c2], %[qc2], 18 \n\t"
|
yading@10
|
741 "shll_s.w %[c3], %[qc3], 18 \n\t"
|
yading@10
|
742 "shll_s.w %[c4], %[qc4], 18 \n\t"
|
yading@10
|
743 "srl %[c1], %[c1], 18 \n\t"
|
yading@10
|
744 "srl %[c2], %[c2], 18 \n\t"
|
yading@10
|
745 "srl %[c3], %[c3], 18 \n\t"
|
yading@10
|
746 "srl %[c4], %[c4], 18 \n\t"
|
yading@10
|
747 "slt $t0, $t4, %[qc1] \n\t"
|
yading@10
|
748 "slt $t1, $t4, %[qc2] \n\t"
|
yading@10
|
749 "slt $t2, $t4, %[qc3] \n\t"
|
yading@10
|
750 "slt $t3, $t4, %[qc4] \n\t"
|
yading@10
|
751 "movn %[qc1], $t4, $t0 \n\t"
|
yading@10
|
752 "movn %[qc2], $t4, $t1 \n\t"
|
yading@10
|
753 "movn %[qc3], $t4, $t2 \n\t"
|
yading@10
|
754 "movn %[qc4], $t4, $t3 \n\t"
|
yading@10
|
755 "lw $t0, 0(%[in_int]) \n\t"
|
yading@10
|
756 "lw $t1, 4(%[in_int]) \n\t"
|
yading@10
|
757 "lw $t2, 8(%[in_int]) \n\t"
|
yading@10
|
758 "lw $t3, 12(%[in_int]) \n\t"
|
yading@10
|
759 "slt $t0, $t0, $zero \n\t"
|
yading@10
|
760 "movn %[sign1], $t0, %[qc1] \n\t"
|
yading@10
|
761 "slt $t2, $t2, $zero \n\t"
|
yading@10
|
762 "movn %[sign2], $t2, %[qc3] \n\t"
|
yading@10
|
763 "slt $t1, $t1, $zero \n\t"
|
yading@10
|
764 "sll $t0, %[sign1], 1 \n\t"
|
yading@10
|
765 "or $t0, $t0, $t1 \n\t"
|
yading@10
|
766 "movn %[sign1], $t0, %[qc2] \n\t"
|
yading@10
|
767 "slt $t3, $t3, $zero \n\t"
|
yading@10
|
768 "sll $t0, %[sign2], 1 \n\t"
|
yading@10
|
769 "or $t0, $t0, $t3 \n\t"
|
yading@10
|
770 "movn %[sign2], $t0, %[qc4] \n\t"
|
yading@10
|
771 "slt %[count1], $zero, %[qc1] \n\t"
|
yading@10
|
772 "slt $t1, $zero, %[qc2] \n\t"
|
yading@10
|
773 "slt %[count2], $zero, %[qc3] \n\t"
|
yading@10
|
774 "slt $t2, $zero, %[qc4] \n\t"
|
yading@10
|
775 "addu %[count1], %[count1], $t1 \n\t"
|
yading@10
|
776 "addu %[count2], %[count2], $t2 \n\t"
|
yading@10
|
777
|
yading@10
|
778 ".set pop \n\t"
|
yading@10
|
779
|
yading@10
|
780 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
|
yading@10
|
781 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
|
yading@10
|
782 [sign1]"=&r"(sign1), [count1]"=&r"(count1),
|
yading@10
|
783 [sign2]"=&r"(sign2), [count2]"=&r"(count2),
|
yading@10
|
784 [c1]"=&r"(c1), [c2]"=&r"(c2),
|
yading@10
|
785 [c3]"=&r"(c3), [c4]"=&r"(c4)
|
yading@10
|
786 : [in_int]"r"(in_int)
|
yading@10
|
787 : "t0", "t1", "t2", "t3", "t4",
|
yading@10
|
788 "memory"
|
yading@10
|
789 );
|
yading@10
|
790
|
yading@10
|
791 curidx = 17 * qc1;
|
yading@10
|
792 curidx += qc2;
|
yading@10
|
793
|
yading@10
|
794 curidx2 = 17 * qc3;
|
yading@10
|
795 curidx2 += qc4;
|
yading@10
|
796
|
yading@10
|
797 v_codes = (p_codes[curidx] << count1) | sign1;
|
yading@10
|
798 v_bits = p_bits[curidx] + count1;
|
yading@10
|
799 put_bits(pb, v_bits, v_codes);
|
yading@10
|
800
|
yading@10
|
801 if (p_vectors[curidx*2 ] == 64.0f) {
|
yading@10
|
802 int len = av_log2(c1);
|
yading@10
|
803 v_codes = (((1 << (len - 3)) - 2) << len) | (c1 & ((1 << len) - 1));
|
yading@10
|
804 put_bits(pb, len * 2 - 3, v_codes);
|
yading@10
|
805 }
|
yading@10
|
806 if (p_vectors[curidx*2+1] == 64.0f) {
|
yading@10
|
807 int len = av_log2(c2);
|
yading@10
|
808 v_codes = (((1 << (len - 3)) - 2) << len) | (c2 & ((1 << len) - 1));
|
yading@10
|
809 put_bits(pb, len*2-3, v_codes);
|
yading@10
|
810 }
|
yading@10
|
811
|
yading@10
|
812 v_codes = (p_codes[curidx2] << count2) | sign2;
|
yading@10
|
813 v_bits = p_bits[curidx2] + count2;
|
yading@10
|
814 put_bits(pb, v_bits, v_codes);
|
yading@10
|
815
|
yading@10
|
816 if (p_vectors[curidx2*2 ] == 64.0f) {
|
yading@10
|
817 int len = av_log2(c3);
|
yading@10
|
818 v_codes = (((1 << (len - 3)) - 2) << len) | (c3 & ((1 << len) - 1));
|
yading@10
|
819 put_bits(pb, len* 2 - 3, v_codes);
|
yading@10
|
820 }
|
yading@10
|
821 if (p_vectors[curidx2*2+1] == 64.0f) {
|
yading@10
|
822 int len = av_log2(c4);
|
yading@10
|
823 v_codes = (((1 << (len - 3)) - 2) << len) | (c4 & ((1 << len) - 1));
|
yading@10
|
824 put_bits(pb, len * 2 - 3, v_codes);
|
yading@10
|
825 }
|
yading@10
|
826 }
|
yading@10
|
827 }
|
yading@10
|
828 }
|
yading@10
|
829
|
yading@10
|
830 static void (*const quantize_and_encode_band_cost_arr[])(struct AACEncContext *s,
|
yading@10
|
831 PutBitContext *pb, const float *in,
|
yading@10
|
832 const float *scaled, int size, int scale_idx,
|
yading@10
|
833 int cb, const float lambda, const float uplim,
|
yading@10
|
834 int *bits) = {
|
yading@10
|
835 NULL,
|
yading@10
|
836 quantize_and_encode_band_cost_SQUAD_mips,
|
yading@10
|
837 quantize_and_encode_band_cost_SQUAD_mips,
|
yading@10
|
838 quantize_and_encode_band_cost_UQUAD_mips,
|
yading@10
|
839 quantize_and_encode_band_cost_UQUAD_mips,
|
yading@10
|
840 quantize_and_encode_band_cost_SPAIR_mips,
|
yading@10
|
841 quantize_and_encode_band_cost_SPAIR_mips,
|
yading@10
|
842 quantize_and_encode_band_cost_UPAIR7_mips,
|
yading@10
|
843 quantize_and_encode_band_cost_UPAIR7_mips,
|
yading@10
|
844 quantize_and_encode_band_cost_UPAIR12_mips,
|
yading@10
|
845 quantize_and_encode_band_cost_UPAIR12_mips,
|
yading@10
|
846 quantize_and_encode_band_cost_ESC_mips,
|
yading@10
|
847 };
|
yading@10
|
848
|
yading@10
|
849 #define quantize_and_encode_band_cost( \
|
yading@10
|
850 s, pb, in, scaled, size, scale_idx, cb, \
|
yading@10
|
851 lambda, uplim, bits) \
|
yading@10
|
852 quantize_and_encode_band_cost_arr[cb]( \
|
yading@10
|
853 s, pb, in, scaled, size, scale_idx, cb, \
|
yading@10
|
854 lambda, uplim, bits)
|
yading@10
|
855
|
yading@10
|
856 static void quantize_and_encode_band_mips(struct AACEncContext *s, PutBitContext *pb,
|
yading@10
|
857 const float *in, int size, int scale_idx,
|
yading@10
|
858 int cb, const float lambda)
|
yading@10
|
859 {
|
yading@10
|
860 quantize_and_encode_band_cost(s, pb, in, NULL, size, scale_idx, cb, lambda,
|
yading@10
|
861 INFINITY, NULL);
|
yading@10
|
862 }
|
yading@10
|
863
|
yading@10
|
864 /**
|
yading@10
|
865 * Functions developed from template function and optimized for getting the number of bits
|
yading@10
|
866 */
|
yading@10
|
867 static float get_band_numbits_ZERO_mips(struct AACEncContext *s,
|
yading@10
|
868 PutBitContext *pb, const float *in,
|
yading@10
|
869 const float *scaled, int size, int scale_idx,
|
yading@10
|
870 int cb, const float lambda, const float uplim,
|
yading@10
|
871 int *bits)
|
yading@10
|
872 {
|
yading@10
|
873 return 0;
|
yading@10
|
874 }
|
yading@10
|
875
|
yading@10
|
876 static float get_band_numbits_SQUAD_mips(struct AACEncContext *s,
|
yading@10
|
877 PutBitContext *pb, const float *in,
|
yading@10
|
878 const float *scaled, int size, int scale_idx,
|
yading@10
|
879 int cb, const float lambda, const float uplim,
|
yading@10
|
880 int *bits)
|
yading@10
|
881 {
|
yading@10
|
882 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
|
yading@10
|
883 int i;
|
yading@10
|
884 int qc1, qc2, qc3, qc4;
|
yading@10
|
885 int curbits = 0;
|
yading@10
|
886
|
yading@10
|
887 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
|
yading@10
|
888
|
yading@10
|
889 for (i = 0; i < size; i += 4) {
|
yading@10
|
890 int curidx;
|
yading@10
|
891 int *in_int = (int *)&in[i];
|
yading@10
|
892
|
yading@10
|
893 qc1 = scaled[i ] * Q34 + 0.4054f;
|
yading@10
|
894 qc2 = scaled[i+1] * Q34 + 0.4054f;
|
yading@10
|
895 qc3 = scaled[i+2] * Q34 + 0.4054f;
|
yading@10
|
896 qc4 = scaled[i+3] * Q34 + 0.4054f;
|
yading@10
|
897
|
yading@10
|
898 __asm__ volatile (
|
yading@10
|
899 ".set push \n\t"
|
yading@10
|
900 ".set noreorder \n\t"
|
yading@10
|
901
|
yading@10
|
902 "slt %[qc1], $zero, %[qc1] \n\t"
|
yading@10
|
903 "slt %[qc2], $zero, %[qc2] \n\t"
|
yading@10
|
904 "slt %[qc3], $zero, %[qc3] \n\t"
|
yading@10
|
905 "slt %[qc4], $zero, %[qc4] \n\t"
|
yading@10
|
906 "lw $t0, 0(%[in_int]) \n\t"
|
yading@10
|
907 "lw $t1, 4(%[in_int]) \n\t"
|
yading@10
|
908 "lw $t2, 8(%[in_int]) \n\t"
|
yading@10
|
909 "lw $t3, 12(%[in_int]) \n\t"
|
yading@10
|
910 "srl $t0, $t0, 31 \n\t"
|
yading@10
|
911 "srl $t1, $t1, 31 \n\t"
|
yading@10
|
912 "srl $t2, $t2, 31 \n\t"
|
yading@10
|
913 "srl $t3, $t3, 31 \n\t"
|
yading@10
|
914 "subu $t4, $zero, %[qc1] \n\t"
|
yading@10
|
915 "subu $t5, $zero, %[qc2] \n\t"
|
yading@10
|
916 "subu $t6, $zero, %[qc3] \n\t"
|
yading@10
|
917 "subu $t7, $zero, %[qc4] \n\t"
|
yading@10
|
918 "movn %[qc1], $t4, $t0 \n\t"
|
yading@10
|
919 "movn %[qc2], $t5, $t1 \n\t"
|
yading@10
|
920 "movn %[qc3], $t6, $t2 \n\t"
|
yading@10
|
921 "movn %[qc4], $t7, $t3 \n\t"
|
yading@10
|
922
|
yading@10
|
923 ".set pop \n\t"
|
yading@10
|
924
|
yading@10
|
925 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
|
yading@10
|
926 [qc3]"+r"(qc3), [qc4]"+r"(qc4)
|
yading@10
|
927 : [in_int]"r"(in_int)
|
yading@10
|
928 : "t0", "t1", "t2", "t3",
|
yading@10
|
929 "t4", "t5", "t6", "t7",
|
yading@10
|
930 "memory"
|
yading@10
|
931 );
|
yading@10
|
932
|
yading@10
|
933 curidx = qc1;
|
yading@10
|
934 curidx *= 3;
|
yading@10
|
935 curidx += qc2;
|
yading@10
|
936 curidx *= 3;
|
yading@10
|
937 curidx += qc3;
|
yading@10
|
938 curidx *= 3;
|
yading@10
|
939 curidx += qc4;
|
yading@10
|
940 curidx += 40;
|
yading@10
|
941
|
yading@10
|
942 curbits += p_bits[curidx];
|
yading@10
|
943 }
|
yading@10
|
944 return curbits;
|
yading@10
|
945 }
|
yading@10
|
946
|
yading@10
|
947 static float get_band_numbits_UQUAD_mips(struct AACEncContext *s,
|
yading@10
|
948 PutBitContext *pb, const float *in,
|
yading@10
|
949 const float *scaled, int size, int scale_idx,
|
yading@10
|
950 int cb, const float lambda, const float uplim,
|
yading@10
|
951 int *bits)
|
yading@10
|
952 {
|
yading@10
|
953 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
|
yading@10
|
954 int i;
|
yading@10
|
955 int curbits = 0;
|
yading@10
|
956 int qc1, qc2, qc3, qc4;
|
yading@10
|
957
|
yading@10
|
958 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
|
yading@10
|
959
|
yading@10
|
960 for (i = 0; i < size; i += 4) {
|
yading@10
|
961 int curidx;
|
yading@10
|
962
|
yading@10
|
963 qc1 = scaled[i ] * Q34 + 0.4054f;
|
yading@10
|
964 qc2 = scaled[i+1] * Q34 + 0.4054f;
|
yading@10
|
965 qc3 = scaled[i+2] * Q34 + 0.4054f;
|
yading@10
|
966 qc4 = scaled[i+3] * Q34 + 0.4054f;
|
yading@10
|
967
|
yading@10
|
968 __asm__ volatile (
|
yading@10
|
969 ".set push \n\t"
|
yading@10
|
970 ".set noreorder \n\t"
|
yading@10
|
971
|
yading@10
|
972 "ori $t4, $zero, 2 \n\t"
|
yading@10
|
973 "slt $t0, $t4, %[qc1] \n\t"
|
yading@10
|
974 "slt $t1, $t4, %[qc2] \n\t"
|
yading@10
|
975 "slt $t2, $t4, %[qc3] \n\t"
|
yading@10
|
976 "slt $t3, $t4, %[qc4] \n\t"
|
yading@10
|
977 "movn %[qc1], $t4, $t0 \n\t"
|
yading@10
|
978 "movn %[qc2], $t4, $t1 \n\t"
|
yading@10
|
979 "movn %[qc3], $t4, $t2 \n\t"
|
yading@10
|
980 "movn %[qc4], $t4, $t3 \n\t"
|
yading@10
|
981
|
yading@10
|
982 ".set pop \n\t"
|
yading@10
|
983
|
yading@10
|
984 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
|
yading@10
|
985 [qc3]"+r"(qc3), [qc4]"+r"(qc4)
|
yading@10
|
986 :
|
yading@10
|
987 : "t0", "t1", "t2", "t3", "t4"
|
yading@10
|
988 );
|
yading@10
|
989
|
yading@10
|
990 curidx = qc1;
|
yading@10
|
991 curidx *= 3;
|
yading@10
|
992 curidx += qc2;
|
yading@10
|
993 curidx *= 3;
|
yading@10
|
994 curidx += qc3;
|
yading@10
|
995 curidx *= 3;
|
yading@10
|
996 curidx += qc4;
|
yading@10
|
997
|
yading@10
|
998 curbits += p_bits[curidx];
|
yading@10
|
999 curbits += uquad_sign_bits[curidx];
|
yading@10
|
1000 }
|
yading@10
|
1001 return curbits;
|
yading@10
|
1002 }
|
yading@10
|
1003
|
yading@10
|
1004 static float get_band_numbits_SPAIR_mips(struct AACEncContext *s,
|
yading@10
|
1005 PutBitContext *pb, const float *in,
|
yading@10
|
1006 const float *scaled, int size, int scale_idx,
|
yading@10
|
1007 int cb, const float lambda, const float uplim,
|
yading@10
|
1008 int *bits)
|
yading@10
|
1009 {
|
yading@10
|
1010 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
|
yading@10
|
1011 int i;
|
yading@10
|
1012 int qc1, qc2, qc3, qc4;
|
yading@10
|
1013 int curbits = 0;
|
yading@10
|
1014
|
yading@10
|
1015 uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
|
yading@10
|
1016
|
yading@10
|
1017 for (i = 0; i < size; i += 4) {
|
yading@10
|
1018 int curidx, curidx2;
|
yading@10
|
1019 int *in_int = (int *)&in[i];
|
yading@10
|
1020
|
yading@10
|
1021 qc1 = scaled[i ] * Q34 + 0.4054f;
|
yading@10
|
1022 qc2 = scaled[i+1] * Q34 + 0.4054f;
|
yading@10
|
1023 qc3 = scaled[i+2] * Q34 + 0.4054f;
|
yading@10
|
1024 qc4 = scaled[i+3] * Q34 + 0.4054f;
|
yading@10
|
1025
|
yading@10
|
1026 __asm__ volatile (
|
yading@10
|
1027 ".set push \n\t"
|
yading@10
|
1028 ".set noreorder \n\t"
|
yading@10
|
1029
|
yading@10
|
1030 "ori $t4, $zero, 4 \n\t"
|
yading@10
|
1031 "slt $t0, $t4, %[qc1] \n\t"
|
yading@10
|
1032 "slt $t1, $t4, %[qc2] \n\t"
|
yading@10
|
1033 "slt $t2, $t4, %[qc3] \n\t"
|
yading@10
|
1034 "slt $t3, $t4, %[qc4] \n\t"
|
yading@10
|
1035 "movn %[qc1], $t4, $t0 \n\t"
|
yading@10
|
1036 "movn %[qc2], $t4, $t1 \n\t"
|
yading@10
|
1037 "movn %[qc3], $t4, $t2 \n\t"
|
yading@10
|
1038 "movn %[qc4], $t4, $t3 \n\t"
|
yading@10
|
1039 "lw $t0, 0(%[in_int]) \n\t"
|
yading@10
|
1040 "lw $t1, 4(%[in_int]) \n\t"
|
yading@10
|
1041 "lw $t2, 8(%[in_int]) \n\t"
|
yading@10
|
1042 "lw $t3, 12(%[in_int]) \n\t"
|
yading@10
|
1043 "srl $t0, $t0, 31 \n\t"
|
yading@10
|
1044 "srl $t1, $t1, 31 \n\t"
|
yading@10
|
1045 "srl $t2, $t2, 31 \n\t"
|
yading@10
|
1046 "srl $t3, $t3, 31 \n\t"
|
yading@10
|
1047 "subu $t4, $zero, %[qc1] \n\t"
|
yading@10
|
1048 "subu $t5, $zero, %[qc2] \n\t"
|
yading@10
|
1049 "subu $t6, $zero, %[qc3] \n\t"
|
yading@10
|
1050 "subu $t7, $zero, %[qc4] \n\t"
|
yading@10
|
1051 "movn %[qc1], $t4, $t0 \n\t"
|
yading@10
|
1052 "movn %[qc2], $t5, $t1 \n\t"
|
yading@10
|
1053 "movn %[qc3], $t6, $t2 \n\t"
|
yading@10
|
1054 "movn %[qc4], $t7, $t3 \n\t"
|
yading@10
|
1055
|
yading@10
|
1056 ".set pop \n\t"
|
yading@10
|
1057
|
yading@10
|
1058 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
|
yading@10
|
1059 [qc3]"+r"(qc3), [qc4]"+r"(qc4)
|
yading@10
|
1060 : [in_int]"r"(in_int)
|
yading@10
|
1061 : "t0", "t1", "t2", "t3",
|
yading@10
|
1062 "t4", "t5", "t6", "t7",
|
yading@10
|
1063 "memory"
|
yading@10
|
1064 );
|
yading@10
|
1065
|
yading@10
|
1066 curidx = 9 * qc1;
|
yading@10
|
1067 curidx += qc2 + 40;
|
yading@10
|
1068
|
yading@10
|
1069 curidx2 = 9 * qc3;
|
yading@10
|
1070 curidx2 += qc4 + 40;
|
yading@10
|
1071
|
yading@10
|
1072 curbits += p_bits[curidx] + p_bits[curidx2];
|
yading@10
|
1073 }
|
yading@10
|
1074 return curbits;
|
yading@10
|
1075 }
|
yading@10
|
1076
|
yading@10
|
1077 static float get_band_numbits_UPAIR7_mips(struct AACEncContext *s,
|
yading@10
|
1078 PutBitContext *pb, const float *in,
|
yading@10
|
1079 const float *scaled, int size, int scale_idx,
|
yading@10
|
1080 int cb, const float lambda, const float uplim,
|
yading@10
|
1081 int *bits)
|
yading@10
|
1082 {
|
yading@10
|
1083 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
|
yading@10
|
1084 int i;
|
yading@10
|
1085 int qc1, qc2, qc3, qc4;
|
yading@10
|
1086 int curbits = 0;
|
yading@10
|
1087
|
yading@10
|
1088 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
|
yading@10
|
1089
|
yading@10
|
1090 for (i = 0; i < size; i += 4) {
|
yading@10
|
1091 int curidx, curidx2;
|
yading@10
|
1092
|
yading@10
|
1093 qc1 = scaled[i ] * Q34 + 0.4054f;
|
yading@10
|
1094 qc2 = scaled[i+1] * Q34 + 0.4054f;
|
yading@10
|
1095 qc3 = scaled[i+2] * Q34 + 0.4054f;
|
yading@10
|
1096 qc4 = scaled[i+3] * Q34 + 0.4054f;
|
yading@10
|
1097
|
yading@10
|
1098 __asm__ volatile (
|
yading@10
|
1099 ".set push \n\t"
|
yading@10
|
1100 ".set noreorder \n\t"
|
yading@10
|
1101
|
yading@10
|
1102 "ori $t4, $zero, 7 \n\t"
|
yading@10
|
1103 "slt $t0, $t4, %[qc1] \n\t"
|
yading@10
|
1104 "slt $t1, $t4, %[qc2] \n\t"
|
yading@10
|
1105 "slt $t2, $t4, %[qc3] \n\t"
|
yading@10
|
1106 "slt $t3, $t4, %[qc4] \n\t"
|
yading@10
|
1107 "movn %[qc1], $t4, $t0 \n\t"
|
yading@10
|
1108 "movn %[qc2], $t4, $t1 \n\t"
|
yading@10
|
1109 "movn %[qc3], $t4, $t2 \n\t"
|
yading@10
|
1110 "movn %[qc4], $t4, $t3 \n\t"
|
yading@10
|
1111
|
yading@10
|
1112 ".set pop \n\t"
|
yading@10
|
1113
|
yading@10
|
1114 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
|
yading@10
|
1115 [qc3]"+r"(qc3), [qc4]"+r"(qc4)
|
yading@10
|
1116 :
|
yading@10
|
1117 : "t0", "t1", "t2", "t3", "t4"
|
yading@10
|
1118 );
|
yading@10
|
1119
|
yading@10
|
1120 curidx = 8 * qc1;
|
yading@10
|
1121 curidx += qc2;
|
yading@10
|
1122
|
yading@10
|
1123 curidx2 = 8 * qc3;
|
yading@10
|
1124 curidx2 += qc4;
|
yading@10
|
1125
|
yading@10
|
1126 curbits += p_bits[curidx] +
|
yading@10
|
1127 upair7_sign_bits[curidx] +
|
yading@10
|
1128 p_bits[curidx2] +
|
yading@10
|
1129 upair7_sign_bits[curidx2];
|
yading@10
|
1130 }
|
yading@10
|
1131 return curbits;
|
yading@10
|
1132 }
|
yading@10
|
1133
|
yading@10
|
1134 static float get_band_numbits_UPAIR12_mips(struct AACEncContext *s,
|
yading@10
|
1135 PutBitContext *pb, const float *in,
|
yading@10
|
1136 const float *scaled, int size, int scale_idx,
|
yading@10
|
1137 int cb, const float lambda, const float uplim,
|
yading@10
|
1138 int *bits)
|
yading@10
|
1139 {
|
yading@10
|
1140 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
|
yading@10
|
1141 int i;
|
yading@10
|
1142 int qc1, qc2, qc3, qc4;
|
yading@10
|
1143 int curbits = 0;
|
yading@10
|
1144
|
yading@10
|
1145 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
|
yading@10
|
1146
|
yading@10
|
1147 for (i = 0; i < size; i += 4) {
|
yading@10
|
1148 int curidx, curidx2;
|
yading@10
|
1149
|
yading@10
|
1150 qc1 = scaled[i ] * Q34 + 0.4054f;
|
yading@10
|
1151 qc2 = scaled[i+1] * Q34 + 0.4054f;
|
yading@10
|
1152 qc3 = scaled[i+2] * Q34 + 0.4054f;
|
yading@10
|
1153 qc4 = scaled[i+3] * Q34 + 0.4054f;
|
yading@10
|
1154
|
yading@10
|
1155 __asm__ volatile (
|
yading@10
|
1156 ".set push \n\t"
|
yading@10
|
1157 ".set noreorder \n\t"
|
yading@10
|
1158
|
yading@10
|
1159 "ori $t4, $zero, 12 \n\t"
|
yading@10
|
1160 "slt $t0, $t4, %[qc1] \n\t"
|
yading@10
|
1161 "slt $t1, $t4, %[qc2] \n\t"
|
yading@10
|
1162 "slt $t2, $t4, %[qc3] \n\t"
|
yading@10
|
1163 "slt $t3, $t4, %[qc4] \n\t"
|
yading@10
|
1164 "movn %[qc1], $t4, $t0 \n\t"
|
yading@10
|
1165 "movn %[qc2], $t4, $t1 \n\t"
|
yading@10
|
1166 "movn %[qc3], $t4, $t2 \n\t"
|
yading@10
|
1167 "movn %[qc4], $t4, $t3 \n\t"
|
yading@10
|
1168
|
yading@10
|
1169 ".set pop \n\t"
|
yading@10
|
1170
|
yading@10
|
1171 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
|
yading@10
|
1172 [qc3]"+r"(qc3), [qc4]"+r"(qc4)
|
yading@10
|
1173 :
|
yading@10
|
1174 : "t0", "t1", "t2", "t3", "t4"
|
yading@10
|
1175 );
|
yading@10
|
1176
|
yading@10
|
1177 curidx = 13 * qc1;
|
yading@10
|
1178 curidx += qc2;
|
yading@10
|
1179
|
yading@10
|
1180 curidx2 = 13 * qc3;
|
yading@10
|
1181 curidx2 += qc4;
|
yading@10
|
1182
|
yading@10
|
1183 curbits += p_bits[curidx] +
|
yading@10
|
1184 p_bits[curidx2] +
|
yading@10
|
1185 upair12_sign_bits[curidx] +
|
yading@10
|
1186 upair12_sign_bits[curidx2];
|
yading@10
|
1187 }
|
yading@10
|
1188 return curbits;
|
yading@10
|
1189 }
|
yading@10
|
1190
|
yading@10
|
1191 static float get_band_numbits_ESC_mips(struct AACEncContext *s,
|
yading@10
|
1192 PutBitContext *pb, const float *in,
|
yading@10
|
1193 const float *scaled, int size, int scale_idx,
|
yading@10
|
1194 int cb, const float lambda, const float uplim,
|
yading@10
|
1195 int *bits)
|
yading@10
|
1196 {
|
yading@10
|
1197 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
|
yading@10
|
1198 int i;
|
yading@10
|
1199 int qc1, qc2, qc3, qc4;
|
yading@10
|
1200 int curbits = 0;
|
yading@10
|
1201
|
yading@10
|
1202 uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
|
yading@10
|
1203
|
yading@10
|
1204 for (i = 0; i < size; i += 4) {
|
yading@10
|
1205 int curidx, curidx2;
|
yading@10
|
1206 int cond0, cond1, cond2, cond3;
|
yading@10
|
1207 int c1, c2, c3, c4;
|
yading@10
|
1208
|
yading@10
|
1209 qc1 = scaled[i ] * Q34 + 0.4054f;
|
yading@10
|
1210 qc2 = scaled[i+1] * Q34 + 0.4054f;
|
yading@10
|
1211 qc3 = scaled[i+2] * Q34 + 0.4054f;
|
yading@10
|
1212 qc4 = scaled[i+3] * Q34 + 0.4054f;
|
yading@10
|
1213
|
yading@10
|
1214 __asm__ volatile (
|
yading@10
|
1215 ".set push \n\t"
|
yading@10
|
1216 ".set noreorder \n\t"
|
yading@10
|
1217
|
yading@10
|
1218 "ori $t4, $zero, 15 \n\t"
|
yading@10
|
1219 "ori $t5, $zero, 16 \n\t"
|
yading@10
|
1220 "shll_s.w %[c1], %[qc1], 18 \n\t"
|
yading@10
|
1221 "shll_s.w %[c2], %[qc2], 18 \n\t"
|
yading@10
|
1222 "shll_s.w %[c3], %[qc3], 18 \n\t"
|
yading@10
|
1223 "shll_s.w %[c4], %[qc4], 18 \n\t"
|
yading@10
|
1224 "srl %[c1], %[c1], 18 \n\t"
|
yading@10
|
1225 "srl %[c2], %[c2], 18 \n\t"
|
yading@10
|
1226 "srl %[c3], %[c3], 18 \n\t"
|
yading@10
|
1227 "srl %[c4], %[c4], 18 \n\t"
|
yading@10
|
1228 "slt %[cond0], $t4, %[qc1] \n\t"
|
yading@10
|
1229 "slt %[cond1], $t4, %[qc2] \n\t"
|
yading@10
|
1230 "slt %[cond2], $t4, %[qc3] \n\t"
|
yading@10
|
1231 "slt %[cond3], $t4, %[qc4] \n\t"
|
yading@10
|
1232 "movn %[qc1], $t5, %[cond0] \n\t"
|
yading@10
|
1233 "movn %[qc2], $t5, %[cond1] \n\t"
|
yading@10
|
1234 "movn %[qc3], $t5, %[cond2] \n\t"
|
yading@10
|
1235 "movn %[qc4], $t5, %[cond3] \n\t"
|
yading@10
|
1236 "ori $t5, $zero, 31 \n\t"
|
yading@10
|
1237 "clz %[c1], %[c1] \n\t"
|
yading@10
|
1238 "clz %[c2], %[c2] \n\t"
|
yading@10
|
1239 "clz %[c3], %[c3] \n\t"
|
yading@10
|
1240 "clz %[c4], %[c4] \n\t"
|
yading@10
|
1241 "subu %[c1], $t5, %[c1] \n\t"
|
yading@10
|
1242 "subu %[c2], $t5, %[c2] \n\t"
|
yading@10
|
1243 "subu %[c3], $t5, %[c3] \n\t"
|
yading@10
|
1244 "subu %[c4], $t5, %[c4] \n\t"
|
yading@10
|
1245 "sll %[c1], %[c1], 1 \n\t"
|
yading@10
|
1246 "sll %[c2], %[c2], 1 \n\t"
|
yading@10
|
1247 "sll %[c3], %[c3], 1 \n\t"
|
yading@10
|
1248 "sll %[c4], %[c4], 1 \n\t"
|
yading@10
|
1249 "addiu %[c1], %[c1], -3 \n\t"
|
yading@10
|
1250 "addiu %[c2], %[c2], -3 \n\t"
|
yading@10
|
1251 "addiu %[c3], %[c3], -3 \n\t"
|
yading@10
|
1252 "addiu %[c4], %[c4], -3 \n\t"
|
yading@10
|
1253 "subu %[cond0], $zero, %[cond0] \n\t"
|
yading@10
|
1254 "subu %[cond1], $zero, %[cond1] \n\t"
|
yading@10
|
1255 "subu %[cond2], $zero, %[cond2] \n\t"
|
yading@10
|
1256 "subu %[cond3], $zero, %[cond3] \n\t"
|
yading@10
|
1257 "and %[c1], %[c1], %[cond0] \n\t"
|
yading@10
|
1258 "and %[c2], %[c2], %[cond1] \n\t"
|
yading@10
|
1259 "and %[c3], %[c3], %[cond2] \n\t"
|
yading@10
|
1260 "and %[c4], %[c4], %[cond3] \n\t"
|
yading@10
|
1261
|
yading@10
|
1262 ".set pop \n\t"
|
yading@10
|
1263
|
yading@10
|
1264 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
|
yading@10
|
1265 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
|
yading@10
|
1266 [cond0]"=&r"(cond0), [cond1]"=&r"(cond1),
|
yading@10
|
1267 [cond2]"=&r"(cond2), [cond3]"=&r"(cond3),
|
yading@10
|
1268 [c1]"=&r"(c1), [c2]"=&r"(c2),
|
yading@10
|
1269 [c3]"=&r"(c3), [c4]"=&r"(c4)
|
yading@10
|
1270 :
|
yading@10
|
1271 : "t4", "t5"
|
yading@10
|
1272 );
|
yading@10
|
1273
|
yading@10
|
1274 curidx = 17 * qc1;
|
yading@10
|
1275 curidx += qc2;
|
yading@10
|
1276
|
yading@10
|
1277 curidx2 = 17 * qc3;
|
yading@10
|
1278 curidx2 += qc4;
|
yading@10
|
1279
|
yading@10
|
1280 curbits += p_bits[curidx];
|
yading@10
|
1281 curbits += esc_sign_bits[curidx];
|
yading@10
|
1282 curbits += p_bits[curidx2];
|
yading@10
|
1283 curbits += esc_sign_bits[curidx2];
|
yading@10
|
1284
|
yading@10
|
1285 curbits += c1;
|
yading@10
|
1286 curbits += c2;
|
yading@10
|
1287 curbits += c3;
|
yading@10
|
1288 curbits += c4;
|
yading@10
|
1289 }
|
yading@10
|
1290 return curbits;
|
yading@10
|
1291 }
|
yading@10
|
1292
|
yading@10
|
1293 static float (*const get_band_numbits_arr[])(struct AACEncContext *s,
|
yading@10
|
1294 PutBitContext *pb, const float *in,
|
yading@10
|
1295 const float *scaled, int size, int scale_idx,
|
yading@10
|
1296 int cb, const float lambda, const float uplim,
|
yading@10
|
1297 int *bits) = {
|
yading@10
|
1298 get_band_numbits_ZERO_mips,
|
yading@10
|
1299 get_band_numbits_SQUAD_mips,
|
yading@10
|
1300 get_band_numbits_SQUAD_mips,
|
yading@10
|
1301 get_band_numbits_UQUAD_mips,
|
yading@10
|
1302 get_band_numbits_UQUAD_mips,
|
yading@10
|
1303 get_band_numbits_SPAIR_mips,
|
yading@10
|
1304 get_band_numbits_SPAIR_mips,
|
yading@10
|
1305 get_band_numbits_UPAIR7_mips,
|
yading@10
|
1306 get_band_numbits_UPAIR7_mips,
|
yading@10
|
1307 get_band_numbits_UPAIR12_mips,
|
yading@10
|
1308 get_band_numbits_UPAIR12_mips,
|
yading@10
|
1309 get_band_numbits_ESC_mips,
|
yading@10
|
1310 };
|
yading@10
|
1311
|
yading@10
|
1312 #define get_band_numbits( \
|
yading@10
|
1313 s, pb, in, scaled, size, scale_idx, cb, \
|
yading@10
|
1314 lambda, uplim, bits) \
|
yading@10
|
1315 get_band_numbits_arr[cb]( \
|
yading@10
|
1316 s, pb, in, scaled, size, scale_idx, cb, \
|
yading@10
|
1317 lambda, uplim, bits)
|
yading@10
|
1318
|
yading@10
|
1319 static float quantize_band_cost_bits(struct AACEncContext *s, const float *in,
|
yading@10
|
1320 const float *scaled, int size, int scale_idx,
|
yading@10
|
1321 int cb, const float lambda, const float uplim,
|
yading@10
|
1322 int *bits)
|
yading@10
|
1323 {
|
yading@10
|
1324 return get_band_numbits(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits);
|
yading@10
|
1325 }
|
yading@10
|
1326
|
yading@10
|
1327 /**
|
yading@10
|
1328 * Functions developed from template function and optimized for getting the band cost
|
yading@10
|
1329 */
|
yading@10
|
1330 #if HAVE_MIPSFPU
|
yading@10
|
1331 static float get_band_cost_ZERO_mips(struct AACEncContext *s,
|
yading@10
|
1332 PutBitContext *pb, const float *in,
|
yading@10
|
1333 const float *scaled, int size, int scale_idx,
|
yading@10
|
1334 int cb, const float lambda, const float uplim,
|
yading@10
|
1335 int *bits)
|
yading@10
|
1336 {
|
yading@10
|
1337 int i;
|
yading@10
|
1338 float cost = 0;
|
yading@10
|
1339
|
yading@10
|
1340 for (i = 0; i < size; i += 4) {
|
yading@10
|
1341 cost += in[i ] * in[i ];
|
yading@10
|
1342 cost += in[i+1] * in[i+1];
|
yading@10
|
1343 cost += in[i+2] * in[i+2];
|
yading@10
|
1344 cost += in[i+3] * in[i+3];
|
yading@10
|
1345 }
|
yading@10
|
1346 if (bits)
|
yading@10
|
1347 *bits = 0;
|
yading@10
|
1348 return cost * lambda;
|
yading@10
|
1349 }
|
yading@10
|
1350
|
yading@10
|
1351 static float get_band_cost_SQUAD_mips(struct AACEncContext *s,
|
yading@10
|
1352 PutBitContext *pb, const float *in,
|
yading@10
|
1353 const float *scaled, int size, int scale_idx,
|
yading@10
|
1354 int cb, const float lambda, const float uplim,
|
yading@10
|
1355 int *bits)
|
yading@10
|
1356 {
|
yading@10
|
1357 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
|
yading@10
|
1358 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
|
yading@10
|
1359 int i;
|
yading@10
|
1360 float cost = 0;
|
yading@10
|
1361 int qc1, qc2, qc3, qc4;
|
yading@10
|
1362 int curbits = 0;
|
yading@10
|
1363
|
yading@10
|
1364 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
|
yading@10
|
1365 float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
|
yading@10
|
1366
|
yading@10
|
1367 for (i = 0; i < size; i += 4) {
|
yading@10
|
1368 const float *vec;
|
yading@10
|
1369 int curidx;
|
yading@10
|
1370 int *in_int = (int *)&in[i];
|
yading@10
|
1371 float *in_pos = (float *)&in[i];
|
yading@10
|
1372 float di0, di1, di2, di3;
|
yading@10
|
1373
|
yading@10
|
1374 qc1 = scaled[i ] * Q34 + 0.4054f;
|
yading@10
|
1375 qc2 = scaled[i+1] * Q34 + 0.4054f;
|
yading@10
|
1376 qc3 = scaled[i+2] * Q34 + 0.4054f;
|
yading@10
|
1377 qc4 = scaled[i+3] * Q34 + 0.4054f;
|
yading@10
|
1378
|
yading@10
|
1379 __asm__ volatile (
|
yading@10
|
1380 ".set push \n\t"
|
yading@10
|
1381 ".set noreorder \n\t"
|
yading@10
|
1382
|
yading@10
|
1383 "slt %[qc1], $zero, %[qc1] \n\t"
|
yading@10
|
1384 "slt %[qc2], $zero, %[qc2] \n\t"
|
yading@10
|
1385 "slt %[qc3], $zero, %[qc3] \n\t"
|
yading@10
|
1386 "slt %[qc4], $zero, %[qc4] \n\t"
|
yading@10
|
1387 "lw $t0, 0(%[in_int]) \n\t"
|
yading@10
|
1388 "lw $t1, 4(%[in_int]) \n\t"
|
yading@10
|
1389 "lw $t2, 8(%[in_int]) \n\t"
|
yading@10
|
1390 "lw $t3, 12(%[in_int]) \n\t"
|
yading@10
|
1391 "srl $t0, $t0, 31 \n\t"
|
yading@10
|
1392 "srl $t1, $t1, 31 \n\t"
|
yading@10
|
1393 "srl $t2, $t2, 31 \n\t"
|
yading@10
|
1394 "srl $t3, $t3, 31 \n\t"
|
yading@10
|
1395 "subu $t4, $zero, %[qc1] \n\t"
|
yading@10
|
1396 "subu $t5, $zero, %[qc2] \n\t"
|
yading@10
|
1397 "subu $t6, $zero, %[qc3] \n\t"
|
yading@10
|
1398 "subu $t7, $zero, %[qc4] \n\t"
|
yading@10
|
1399 "movn %[qc1], $t4, $t0 \n\t"
|
yading@10
|
1400 "movn %[qc2], $t5, $t1 \n\t"
|
yading@10
|
1401 "movn %[qc3], $t6, $t2 \n\t"
|
yading@10
|
1402 "movn %[qc4], $t7, $t3 \n\t"
|
yading@10
|
1403
|
yading@10
|
1404 ".set pop \n\t"
|
yading@10
|
1405
|
yading@10
|
1406 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
|
yading@10
|
1407 [qc3]"+r"(qc3), [qc4]"+r"(qc4)
|
yading@10
|
1408 : [in_int]"r"(in_int)
|
yading@10
|
1409 : "t0", "t1", "t2", "t3",
|
yading@10
|
1410 "t4", "t5", "t6", "t7",
|
yading@10
|
1411 "memory"
|
yading@10
|
1412 );
|
yading@10
|
1413
|
yading@10
|
1414 curidx = qc1;
|
yading@10
|
1415 curidx *= 3;
|
yading@10
|
1416 curidx += qc2;
|
yading@10
|
1417 curidx *= 3;
|
yading@10
|
1418 curidx += qc3;
|
yading@10
|
1419 curidx *= 3;
|
yading@10
|
1420 curidx += qc4;
|
yading@10
|
1421 curidx += 40;
|
yading@10
|
1422
|
yading@10
|
1423 curbits += p_bits[curidx];
|
yading@10
|
1424 vec = &p_codes[curidx*4];
|
yading@10
|
1425
|
yading@10
|
1426 __asm__ volatile (
|
yading@10
|
1427 ".set push \n\t"
|
yading@10
|
1428 ".set noreorder \n\t"
|
yading@10
|
1429
|
yading@10
|
1430 "lwc1 $f0, 0(%[in_pos]) \n\t"
|
yading@10
|
1431 "lwc1 $f1, 0(%[vec]) \n\t"
|
yading@10
|
1432 "lwc1 $f2, 4(%[in_pos]) \n\t"
|
yading@10
|
1433 "lwc1 $f3, 4(%[vec]) \n\t"
|
yading@10
|
1434 "lwc1 $f4, 8(%[in_pos]) \n\t"
|
yading@10
|
1435 "lwc1 $f5, 8(%[vec]) \n\t"
|
yading@10
|
1436 "lwc1 $f6, 12(%[in_pos]) \n\t"
|
yading@10
|
1437 "lwc1 $f7, 12(%[vec]) \n\t"
|
yading@10
|
1438 "nmsub.s %[di0], $f0, $f1, %[IQ] \n\t"
|
yading@10
|
1439 "nmsub.s %[di1], $f2, $f3, %[IQ] \n\t"
|
yading@10
|
1440 "nmsub.s %[di2], $f4, $f5, %[IQ] \n\t"
|
yading@10
|
1441 "nmsub.s %[di3], $f6, $f7, %[IQ] \n\t"
|
yading@10
|
1442
|
yading@10
|
1443 ".set pop \n\t"
|
yading@10
|
1444
|
yading@10
|
1445 : [di0]"=&f"(di0), [di1]"=&f"(di1),
|
yading@10
|
1446 [di2]"=&f"(di2), [di3]"=&f"(di3)
|
yading@10
|
1447 : [in_pos]"r"(in_pos), [vec]"r"(vec),
|
yading@10
|
1448 [IQ]"f"(IQ)
|
yading@10
|
1449 : "$f0", "$f1", "$f2", "$f3",
|
yading@10
|
1450 "$f4", "$f5", "$f6", "$f7",
|
yading@10
|
1451 "memory"
|
yading@10
|
1452 );
|
yading@10
|
1453
|
yading@10
|
1454 cost += di0 * di0 + di1 * di1
|
yading@10
|
1455 + di2 * di2 + di3 * di3;
|
yading@10
|
1456 }
|
yading@10
|
1457
|
yading@10
|
1458 if (bits)
|
yading@10
|
1459 *bits = curbits;
|
yading@10
|
1460 return cost * lambda + curbits;
|
yading@10
|
1461 }
|
yading@10
|
1462
|
yading@10
|
1463 static float get_band_cost_UQUAD_mips(struct AACEncContext *s,
|
yading@10
|
1464 PutBitContext *pb, const float *in,
|
yading@10
|
1465 const float *scaled, int size, int scale_idx,
|
yading@10
|
1466 int cb, const float lambda, const float uplim,
|
yading@10
|
1467 int *bits)
|
yading@10
|
1468 {
|
yading@10
|
1469 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
|
yading@10
|
1470 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
|
yading@10
|
1471 int i;
|
yading@10
|
1472 float cost = 0;
|
yading@10
|
1473 int curbits = 0;
|
yading@10
|
1474 int qc1, qc2, qc3, qc4;
|
yading@10
|
1475
|
yading@10
|
1476 uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
|
yading@10
|
1477 float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
|
yading@10
|
1478
|
yading@10
|
1479 for (i = 0; i < size; i += 4) {
|
yading@10
|
1480 const float *vec;
|
yading@10
|
1481 int curidx;
|
yading@10
|
1482 float *in_pos = (float *)&in[i];
|
yading@10
|
1483 float di0, di1, di2, di3;
|
yading@10
|
1484
|
yading@10
|
1485 qc1 = scaled[i ] * Q34 + 0.4054f;
|
yading@10
|
1486 qc2 = scaled[i+1] * Q34 + 0.4054f;
|
yading@10
|
1487 qc3 = scaled[i+2] * Q34 + 0.4054f;
|
yading@10
|
1488 qc4 = scaled[i+3] * Q34 + 0.4054f;
|
yading@10
|
1489
|
yading@10
|
1490 __asm__ volatile (
|
yading@10
|
1491 ".set push \n\t"
|
yading@10
|
1492 ".set noreorder \n\t"
|
yading@10
|
1493
|
yading@10
|
1494 "ori $t4, $zero, 2 \n\t"
|
yading@10
|
1495 "slt $t0, $t4, %[qc1] \n\t"
|
yading@10
|
1496 "slt $t1, $t4, %[qc2] \n\t"
|
yading@10
|
1497 "slt $t2, $t4, %[qc3] \n\t"
|
yading@10
|
1498 "slt $t3, $t4, %[qc4] \n\t"
|
yading@10
|
1499 "movn %[qc1], $t4, $t0 \n\t"
|
yading@10
|
1500 "movn %[qc2], $t4, $t1 \n\t"
|
yading@10
|
1501 "movn %[qc3], $t4, $t2 \n\t"
|
yading@10
|
1502 "movn %[qc4], $t4, $t3 \n\t"
|
yading@10
|
1503
|
yading@10
|
1504 ".set pop \n\t"
|
yading@10
|
1505
|
yading@10
|
1506 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
|
yading@10
|
1507 [qc3]"+r"(qc3), [qc4]"+r"(qc4)
|
yading@10
|
1508 :
|
yading@10
|
1509 : "t0", "t1", "t2", "t3", "t4"
|
yading@10
|
1510 );
|
yading@10
|
1511
|
yading@10
|
1512 curidx = qc1;
|
yading@10
|
1513 curidx *= 3;
|
yading@10
|
1514 curidx += qc2;
|
yading@10
|
1515 curidx *= 3;
|
yading@10
|
1516 curidx += qc3;
|
yading@10
|
1517 curidx *= 3;
|
yading@10
|
1518 curidx += qc4;
|
yading@10
|
1519
|
yading@10
|
1520 curbits += p_bits[curidx];
|
yading@10
|
1521 curbits += uquad_sign_bits[curidx];
|
yading@10
|
1522 vec = &p_codes[curidx*4];
|
yading@10
|
1523
|
yading@10
|
1524 __asm__ volatile (
|
yading@10
|
1525 ".set push \n\t"
|
yading@10
|
1526 ".set noreorder \n\t"
|
yading@10
|
1527
|
yading@10
|
1528 "lwc1 %[di0], 0(%[in_pos]) \n\t"
|
yading@10
|
1529 "lwc1 %[di1], 4(%[in_pos]) \n\t"
|
yading@10
|
1530 "lwc1 %[di2], 8(%[in_pos]) \n\t"
|
yading@10
|
1531 "lwc1 %[di3], 12(%[in_pos]) \n\t"
|
yading@10
|
1532 "abs.s %[di0], %[di0] \n\t"
|
yading@10
|
1533 "abs.s %[di1], %[di1] \n\t"
|
yading@10
|
1534 "abs.s %[di2], %[di2] \n\t"
|
yading@10
|
1535 "abs.s %[di3], %[di3] \n\t"
|
yading@10
|
1536 "lwc1 $f0, 0(%[vec]) \n\t"
|
yading@10
|
1537 "lwc1 $f1, 4(%[vec]) \n\t"
|
yading@10
|
1538 "lwc1 $f2, 8(%[vec]) \n\t"
|
yading@10
|
1539 "lwc1 $f3, 12(%[vec]) \n\t"
|
yading@10
|
1540 "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
|
yading@10
|
1541 "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
|
yading@10
|
1542 "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
|
yading@10
|
1543 "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
|
yading@10
|
1544
|
yading@10
|
1545 ".set pop \n\t"
|
yading@10
|
1546
|
yading@10
|
1547 : [di0]"=&f"(di0), [di1]"=&f"(di1),
|
yading@10
|
1548 [di2]"=&f"(di2), [di3]"=&f"(di3)
|
yading@10
|
1549 : [in_pos]"r"(in_pos), [vec]"r"(vec),
|
yading@10
|
1550 [IQ]"f"(IQ)
|
yading@10
|
1551 : "$f0", "$f1", "$f2", "$f3",
|
yading@10
|
1552 "memory"
|
yading@10
|
1553 );
|
yading@10
|
1554
|
yading@10
|
1555 cost += di0 * di0 + di1 * di1
|
yading@10
|
1556 + di2 * di2 + di3 * di3;
|
yading@10
|
1557 }
|
yading@10
|
1558
|
yading@10
|
1559 if (bits)
|
yading@10
|
1560 *bits = curbits;
|
yading@10
|
1561 return cost * lambda + curbits;
|
yading@10
|
1562 }
|
yading@10
|
1563
|
yading@10
|
1564 static float get_band_cost_SPAIR_mips(struct AACEncContext *s,
|
yading@10
|
1565 PutBitContext *pb, const float *in,
|
yading@10
|
1566 const float *scaled, int size, int scale_idx,
|
yading@10
|
1567 int cb, const float lambda, const float uplim,
|
yading@10
|
1568 int *bits)
|
yading@10
|
1569 {
|
yading@10
|
1570 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
|
yading@10
|
1571 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
|
yading@10
|
1572 int i;
|
yading@10
|
1573 float cost = 0;
|
yading@10
|
1574 int qc1, qc2, qc3, qc4;
|
yading@10
|
1575 int curbits = 0;
|
yading@10
|
1576
|
yading@10
|
1577 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
|
yading@10
|
1578 float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
|
yading@10
|
1579
|
yading@10
|
1580 for (i = 0; i < size; i += 4) {
|
yading@10
|
1581 const float *vec, *vec2;
|
yading@10
|
1582 int curidx, curidx2;
|
yading@10
|
1583 int *in_int = (int *)&in[i];
|
yading@10
|
1584 float *in_pos = (float *)&in[i];
|
yading@10
|
1585 float di0, di1, di2, di3;
|
yading@10
|
1586
|
yading@10
|
1587 qc1 = scaled[i ] * Q34 + 0.4054f;
|
yading@10
|
1588 qc2 = scaled[i+1] * Q34 + 0.4054f;
|
yading@10
|
1589 qc3 = scaled[i+2] * Q34 + 0.4054f;
|
yading@10
|
1590 qc4 = scaled[i+3] * Q34 + 0.4054f;
|
yading@10
|
1591
|
yading@10
|
1592 __asm__ volatile (
|
yading@10
|
1593 ".set push \n\t"
|
yading@10
|
1594 ".set noreorder \n\t"
|
yading@10
|
1595
|
yading@10
|
1596 "ori $t4, $zero, 4 \n\t"
|
yading@10
|
1597 "slt $t0, $t4, %[qc1] \n\t"
|
yading@10
|
1598 "slt $t1, $t4, %[qc2] \n\t"
|
yading@10
|
1599 "slt $t2, $t4, %[qc3] \n\t"
|
yading@10
|
1600 "slt $t3, $t4, %[qc4] \n\t"
|
yading@10
|
1601 "movn %[qc1], $t4, $t0 \n\t"
|
yading@10
|
1602 "movn %[qc2], $t4, $t1 \n\t"
|
yading@10
|
1603 "movn %[qc3], $t4, $t2 \n\t"
|
yading@10
|
1604 "movn %[qc4], $t4, $t3 \n\t"
|
yading@10
|
1605 "lw $t0, 0(%[in_int]) \n\t"
|
yading@10
|
1606 "lw $t1, 4(%[in_int]) \n\t"
|
yading@10
|
1607 "lw $t2, 8(%[in_int]) \n\t"
|
yading@10
|
1608 "lw $t3, 12(%[in_int]) \n\t"
|
yading@10
|
1609 "srl $t0, $t0, 31 \n\t"
|
yading@10
|
1610 "srl $t1, $t1, 31 \n\t"
|
yading@10
|
1611 "srl $t2, $t2, 31 \n\t"
|
yading@10
|
1612 "srl $t3, $t3, 31 \n\t"
|
yading@10
|
1613 "subu $t4, $zero, %[qc1] \n\t"
|
yading@10
|
1614 "subu $t5, $zero, %[qc2] \n\t"
|
yading@10
|
1615 "subu $t6, $zero, %[qc3] \n\t"
|
yading@10
|
1616 "subu $t7, $zero, %[qc4] \n\t"
|
yading@10
|
1617 "movn %[qc1], $t4, $t0 \n\t"
|
yading@10
|
1618 "movn %[qc2], $t5, $t1 \n\t"
|
yading@10
|
1619 "movn %[qc3], $t6, $t2 \n\t"
|
yading@10
|
1620 "movn %[qc4], $t7, $t3 \n\t"
|
yading@10
|
1621
|
yading@10
|
1622 ".set pop \n\t"
|
yading@10
|
1623
|
yading@10
|
1624 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
|
yading@10
|
1625 [qc3]"+r"(qc3), [qc4]"+r"(qc4)
|
yading@10
|
1626 : [in_int]"r"(in_int)
|
yading@10
|
1627 : "t0", "t1", "t2", "t3",
|
yading@10
|
1628 "t4", "t5", "t6", "t7",
|
yading@10
|
1629 "memory"
|
yading@10
|
1630 );
|
yading@10
|
1631
|
yading@10
|
1632 curidx = 9 * qc1;
|
yading@10
|
1633 curidx += qc2 + 40;
|
yading@10
|
1634
|
yading@10
|
1635 curidx2 = 9 * qc3;
|
yading@10
|
1636 curidx2 += qc4 + 40;
|
yading@10
|
1637
|
yading@10
|
1638 curbits += p_bits[curidx];
|
yading@10
|
1639 curbits += p_bits[curidx2];
|
yading@10
|
1640
|
yading@10
|
1641 vec = &p_codes[curidx*2];
|
yading@10
|
1642 vec2 = &p_codes[curidx2*2];
|
yading@10
|
1643
|
yading@10
|
1644 __asm__ volatile (
|
yading@10
|
1645 ".set push \n\t"
|
yading@10
|
1646 ".set noreorder \n\t"
|
yading@10
|
1647
|
yading@10
|
1648 "lwc1 $f0, 0(%[in_pos]) \n\t"
|
yading@10
|
1649 "lwc1 $f1, 0(%[vec]) \n\t"
|
yading@10
|
1650 "lwc1 $f2, 4(%[in_pos]) \n\t"
|
yading@10
|
1651 "lwc1 $f3, 4(%[vec]) \n\t"
|
yading@10
|
1652 "lwc1 $f4, 8(%[in_pos]) \n\t"
|
yading@10
|
1653 "lwc1 $f5, 0(%[vec2]) \n\t"
|
yading@10
|
1654 "lwc1 $f6, 12(%[in_pos]) \n\t"
|
yading@10
|
1655 "lwc1 $f7, 4(%[vec2]) \n\t"
|
yading@10
|
1656 "nmsub.s %[di0], $f0, $f1, %[IQ] \n\t"
|
yading@10
|
1657 "nmsub.s %[di1], $f2, $f3, %[IQ] \n\t"
|
yading@10
|
1658 "nmsub.s %[di2], $f4, $f5, %[IQ] \n\t"
|
yading@10
|
1659 "nmsub.s %[di3], $f6, $f7, %[IQ] \n\t"
|
yading@10
|
1660
|
yading@10
|
1661 ".set pop \n\t"
|
yading@10
|
1662
|
yading@10
|
1663 : [di0]"=&f"(di0), [di1]"=&f"(di1),
|
yading@10
|
1664 [di2]"=&f"(di2), [di3]"=&f"(di3)
|
yading@10
|
1665 : [in_pos]"r"(in_pos), [vec]"r"(vec),
|
yading@10
|
1666 [vec2]"r"(vec2), [IQ]"f"(IQ)
|
yading@10
|
1667 : "$f0", "$f1", "$f2", "$f3",
|
yading@10
|
1668 "$f4", "$f5", "$f6", "$f7",
|
yading@10
|
1669 "memory"
|
yading@10
|
1670 );
|
yading@10
|
1671
|
yading@10
|
1672 cost += di0 * di0 + di1 * di1
|
yading@10
|
1673 + di2 * di2 + di3 * di3;
|
yading@10
|
1674 }
|
yading@10
|
1675
|
yading@10
|
1676 if (bits)
|
yading@10
|
1677 *bits = curbits;
|
yading@10
|
1678 return cost * lambda + curbits;
|
yading@10
|
1679 }
|
yading@10
|
1680
|
yading@10
|
1681 static float get_band_cost_UPAIR7_mips(struct AACEncContext *s,
|
yading@10
|
1682 PutBitContext *pb, const float *in,
|
yading@10
|
1683 const float *scaled, int size, int scale_idx,
|
yading@10
|
1684 int cb, const float lambda, const float uplim,
|
yading@10
|
1685 int *bits)
|
yading@10
|
1686 {
|
yading@10
|
1687 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
|
yading@10
|
1688 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
|
yading@10
|
1689 int i;
|
yading@10
|
1690 float cost = 0;
|
yading@10
|
1691 int qc1, qc2, qc3, qc4;
|
yading@10
|
1692 int curbits = 0;
|
yading@10
|
1693
|
yading@10
|
1694 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
|
yading@10
|
1695 float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
|
yading@10
|
1696
|
yading@10
|
1697 for (i = 0; i < size; i += 4) {
|
yading@10
|
1698 const float *vec, *vec2;
|
yading@10
|
1699 int curidx, curidx2, sign1, count1, sign2, count2;
|
yading@10
|
1700 int *in_int = (int *)&in[i];
|
yading@10
|
1701 float *in_pos = (float *)&in[i];
|
yading@10
|
1702 float di0, di1, di2, di3;
|
yading@10
|
1703
|
yading@10
|
1704 qc1 = scaled[i ] * Q34 + 0.4054f;
|
yading@10
|
1705 qc2 = scaled[i+1] * Q34 + 0.4054f;
|
yading@10
|
1706 qc3 = scaled[i+2] * Q34 + 0.4054f;
|
yading@10
|
1707 qc4 = scaled[i+3] * Q34 + 0.4054f;
|
yading@10
|
1708
|
yading@10
|
1709 __asm__ volatile (
|
yading@10
|
1710 ".set push \n\t"
|
yading@10
|
1711 ".set noreorder \n\t"
|
yading@10
|
1712
|
yading@10
|
1713 "ori $t4, $zero, 7 \n\t"
|
yading@10
|
1714 "ori %[sign1], $zero, 0 \n\t"
|
yading@10
|
1715 "ori %[sign2], $zero, 0 \n\t"
|
yading@10
|
1716 "slt $t0, $t4, %[qc1] \n\t"
|
yading@10
|
1717 "slt $t1, $t4, %[qc2] \n\t"
|
yading@10
|
1718 "slt $t2, $t4, %[qc3] \n\t"
|
yading@10
|
1719 "slt $t3, $t4, %[qc4] \n\t"
|
yading@10
|
1720 "movn %[qc1], $t4, $t0 \n\t"
|
yading@10
|
1721 "movn %[qc2], $t4, $t1 \n\t"
|
yading@10
|
1722 "movn %[qc3], $t4, $t2 \n\t"
|
yading@10
|
1723 "movn %[qc4], $t4, $t3 \n\t"
|
yading@10
|
1724 "lw $t0, 0(%[in_int]) \n\t"
|
yading@10
|
1725 "lw $t1, 4(%[in_int]) \n\t"
|
yading@10
|
1726 "lw $t2, 8(%[in_int]) \n\t"
|
yading@10
|
1727 "lw $t3, 12(%[in_int]) \n\t"
|
yading@10
|
1728 "slt $t0, $t0, $zero \n\t"
|
yading@10
|
1729 "movn %[sign1], $t0, %[qc1] \n\t"
|
yading@10
|
1730 "slt $t2, $t2, $zero \n\t"
|
yading@10
|
1731 "movn %[sign2], $t2, %[qc3] \n\t"
|
yading@10
|
1732 "slt $t1, $t1, $zero \n\t"
|
yading@10
|
1733 "sll $t0, %[sign1], 1 \n\t"
|
yading@10
|
1734 "or $t0, $t0, $t1 \n\t"
|
yading@10
|
1735 "movn %[sign1], $t0, %[qc2] \n\t"
|
yading@10
|
1736 "slt $t3, $t3, $zero \n\t"
|
yading@10
|
1737 "sll $t0, %[sign2], 1 \n\t"
|
yading@10
|
1738 "or $t0, $t0, $t3 \n\t"
|
yading@10
|
1739 "movn %[sign2], $t0, %[qc4] \n\t"
|
yading@10
|
1740 "slt %[count1], $zero, %[qc1] \n\t"
|
yading@10
|
1741 "slt $t1, $zero, %[qc2] \n\t"
|
yading@10
|
1742 "slt %[count2], $zero, %[qc3] \n\t"
|
yading@10
|
1743 "slt $t2, $zero, %[qc4] \n\t"
|
yading@10
|
1744 "addu %[count1], %[count1], $t1 \n\t"
|
yading@10
|
1745 "addu %[count2], %[count2], $t2 \n\t"
|
yading@10
|
1746
|
yading@10
|
1747 ".set pop \n\t"
|
yading@10
|
1748
|
yading@10
|
1749 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
|
yading@10
|
1750 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
|
yading@10
|
1751 [sign1]"=&r"(sign1), [count1]"=&r"(count1),
|
yading@10
|
1752 [sign2]"=&r"(sign2), [count2]"=&r"(count2)
|
yading@10
|
1753 : [in_int]"r"(in_int)
|
yading@10
|
1754 : "t0", "t1", "t2", "t3", "t4",
|
yading@10
|
1755 "memory"
|
yading@10
|
1756 );
|
yading@10
|
1757
|
yading@10
|
1758 curidx = 8 * qc1;
|
yading@10
|
1759 curidx += qc2;
|
yading@10
|
1760
|
yading@10
|
1761 curidx2 = 8 * qc3;
|
yading@10
|
1762 curidx2 += qc4;
|
yading@10
|
1763
|
yading@10
|
1764 curbits += p_bits[curidx];
|
yading@10
|
1765 curbits += upair7_sign_bits[curidx];
|
yading@10
|
1766 vec = &p_codes[curidx*2];
|
yading@10
|
1767
|
yading@10
|
1768 curbits += p_bits[curidx2];
|
yading@10
|
1769 curbits += upair7_sign_bits[curidx2];
|
yading@10
|
1770 vec2 = &p_codes[curidx2*2];
|
yading@10
|
1771
|
yading@10
|
1772 __asm__ volatile (
|
yading@10
|
1773 ".set push \n\t"
|
yading@10
|
1774 ".set noreorder \n\t"
|
yading@10
|
1775
|
yading@10
|
1776 "lwc1 %[di0], 0(%[in_pos]) \n\t"
|
yading@10
|
1777 "lwc1 %[di1], 4(%[in_pos]) \n\t"
|
yading@10
|
1778 "lwc1 %[di2], 8(%[in_pos]) \n\t"
|
yading@10
|
1779 "lwc1 %[di3], 12(%[in_pos]) \n\t"
|
yading@10
|
1780 "abs.s %[di0], %[di0] \n\t"
|
yading@10
|
1781 "abs.s %[di1], %[di1] \n\t"
|
yading@10
|
1782 "abs.s %[di2], %[di2] \n\t"
|
yading@10
|
1783 "abs.s %[di3], %[di3] \n\t"
|
yading@10
|
1784 "lwc1 $f0, 0(%[vec]) \n\t"
|
yading@10
|
1785 "lwc1 $f1, 4(%[vec]) \n\t"
|
yading@10
|
1786 "lwc1 $f2, 0(%[vec2]) \n\t"
|
yading@10
|
1787 "lwc1 $f3, 4(%[vec2]) \n\t"
|
yading@10
|
1788 "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
|
yading@10
|
1789 "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
|
yading@10
|
1790 "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
|
yading@10
|
1791 "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
|
yading@10
|
1792
|
yading@10
|
1793 ".set pop \n\t"
|
yading@10
|
1794
|
yading@10
|
1795 : [di0]"=&f"(di0), [di1]"=&f"(di1),
|
yading@10
|
1796 [di2]"=&f"(di2), [di3]"=&f"(di3)
|
yading@10
|
1797 : [in_pos]"r"(in_pos), [vec]"r"(vec),
|
yading@10
|
1798 [vec2]"r"(vec2), [IQ]"f"(IQ)
|
yading@10
|
1799 : "$f0", "$f1", "$f2", "$f3",
|
yading@10
|
1800 "memory"
|
yading@10
|
1801 );
|
yading@10
|
1802
|
yading@10
|
1803 cost += di0 * di0 + di1 * di1
|
yading@10
|
1804 + di2 * di2 + di3 * di3;
|
yading@10
|
1805 }
|
yading@10
|
1806
|
yading@10
|
1807 if (bits)
|
yading@10
|
1808 *bits = curbits;
|
yading@10
|
1809 return cost * lambda + curbits;
|
yading@10
|
1810 }
|
yading@10
|
1811
|
yading@10
|
1812 static float get_band_cost_UPAIR12_mips(struct AACEncContext *s,
|
yading@10
|
1813 PutBitContext *pb, const float *in,
|
yading@10
|
1814 const float *scaled, int size, int scale_idx,
|
yading@10
|
1815 int cb, const float lambda, const float uplim,
|
yading@10
|
1816 int *bits)
|
yading@10
|
1817 {
|
yading@10
|
1818 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
|
yading@10
|
1819 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
|
yading@10
|
1820 int i;
|
yading@10
|
1821 float cost = 0;
|
yading@10
|
1822 int qc1, qc2, qc3, qc4;
|
yading@10
|
1823 int curbits = 0;
|
yading@10
|
1824
|
yading@10
|
1825 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
|
yading@10
|
1826 float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
|
yading@10
|
1827
|
yading@10
|
1828 for (i = 0; i < size; i += 4) {
|
yading@10
|
1829 const float *vec, *vec2;
|
yading@10
|
1830 int curidx, curidx2;
|
yading@10
|
1831 int sign1, count1, sign2, count2;
|
yading@10
|
1832 int *in_int = (int *)&in[i];
|
yading@10
|
1833 float *in_pos = (float *)&in[i];
|
yading@10
|
1834 float di0, di1, di2, di3;
|
yading@10
|
1835
|
yading@10
|
1836 qc1 = scaled[i ] * Q34 + 0.4054f;
|
yading@10
|
1837 qc2 = scaled[i+1] * Q34 + 0.4054f;
|
yading@10
|
1838 qc3 = scaled[i+2] * Q34 + 0.4054f;
|
yading@10
|
1839 qc4 = scaled[i+3] * Q34 + 0.4054f;
|
yading@10
|
1840
|
yading@10
|
1841 __asm__ volatile (
|
yading@10
|
1842 ".set push \n\t"
|
yading@10
|
1843 ".set noreorder \n\t"
|
yading@10
|
1844
|
yading@10
|
1845 "ori $t4, $zero, 12 \n\t"
|
yading@10
|
1846 "ori %[sign1], $zero, 0 \n\t"
|
yading@10
|
1847 "ori %[sign2], $zero, 0 \n\t"
|
yading@10
|
1848 "slt $t0, $t4, %[qc1] \n\t"
|
yading@10
|
1849 "slt $t1, $t4, %[qc2] \n\t"
|
yading@10
|
1850 "slt $t2, $t4, %[qc3] \n\t"
|
yading@10
|
1851 "slt $t3, $t4, %[qc4] \n\t"
|
yading@10
|
1852 "movn %[qc1], $t4, $t0 \n\t"
|
yading@10
|
1853 "movn %[qc2], $t4, $t1 \n\t"
|
yading@10
|
1854 "movn %[qc3], $t4, $t2 \n\t"
|
yading@10
|
1855 "movn %[qc4], $t4, $t3 \n\t"
|
yading@10
|
1856 "lw $t0, 0(%[in_int]) \n\t"
|
yading@10
|
1857 "lw $t1, 4(%[in_int]) \n\t"
|
yading@10
|
1858 "lw $t2, 8(%[in_int]) \n\t"
|
yading@10
|
1859 "lw $t3, 12(%[in_int]) \n\t"
|
yading@10
|
1860 "slt $t0, $t0, $zero \n\t"
|
yading@10
|
1861 "movn %[sign1], $t0, %[qc1] \n\t"
|
yading@10
|
1862 "slt $t2, $t2, $zero \n\t"
|
yading@10
|
1863 "movn %[sign2], $t2, %[qc3] \n\t"
|
yading@10
|
1864 "slt $t1, $t1, $zero \n\t"
|
yading@10
|
1865 "sll $t0, %[sign1], 1 \n\t"
|
yading@10
|
1866 "or $t0, $t0, $t1 \n\t"
|
yading@10
|
1867 "movn %[sign1], $t0, %[qc2] \n\t"
|
yading@10
|
1868 "slt $t3, $t3, $zero \n\t"
|
yading@10
|
1869 "sll $t0, %[sign2], 1 \n\t"
|
yading@10
|
1870 "or $t0, $t0, $t3 \n\t"
|
yading@10
|
1871 "movn %[sign2], $t0, %[qc4] \n\t"
|
yading@10
|
1872 "slt %[count1], $zero, %[qc1] \n\t"
|
yading@10
|
1873 "slt $t1, $zero, %[qc2] \n\t"
|
yading@10
|
1874 "slt %[count2], $zero, %[qc3] \n\t"
|
yading@10
|
1875 "slt $t2, $zero, %[qc4] \n\t"
|
yading@10
|
1876 "addu %[count1], %[count1], $t1 \n\t"
|
yading@10
|
1877 "addu %[count2], %[count2], $t2 \n\t"
|
yading@10
|
1878
|
yading@10
|
1879 ".set pop \n\t"
|
yading@10
|
1880
|
yading@10
|
1881 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
|
yading@10
|
1882 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
|
yading@10
|
1883 [sign1]"=&r"(sign1), [count1]"=&r"(count1),
|
yading@10
|
1884 [sign2]"=&r"(sign2), [count2]"=&r"(count2)
|
yading@10
|
1885 : [in_int]"r"(in_int)
|
yading@10
|
1886 : "t0", "t1", "t2", "t3", "t4",
|
yading@10
|
1887 "memory"
|
yading@10
|
1888 );
|
yading@10
|
1889
|
yading@10
|
1890 curidx = 13 * qc1;
|
yading@10
|
1891 curidx += qc2;
|
yading@10
|
1892
|
yading@10
|
1893 curidx2 = 13 * qc3;
|
yading@10
|
1894 curidx2 += qc4;
|
yading@10
|
1895
|
yading@10
|
1896 curbits += p_bits[curidx];
|
yading@10
|
1897 curbits += p_bits[curidx2];
|
yading@10
|
1898 curbits += upair12_sign_bits[curidx];
|
yading@10
|
1899 curbits += upair12_sign_bits[curidx2];
|
yading@10
|
1900 vec = &p_codes[curidx*2];
|
yading@10
|
1901 vec2 = &p_codes[curidx2*2];
|
yading@10
|
1902
|
yading@10
|
1903 __asm__ volatile (
|
yading@10
|
1904 ".set push \n\t"
|
yading@10
|
1905 ".set noreorder \n\t"
|
yading@10
|
1906
|
yading@10
|
1907 "lwc1 %[di0], 0(%[in_pos]) \n\t"
|
yading@10
|
1908 "lwc1 %[di1], 4(%[in_pos]) \n\t"
|
yading@10
|
1909 "lwc1 %[di2], 8(%[in_pos]) \n\t"
|
yading@10
|
1910 "lwc1 %[di3], 12(%[in_pos]) \n\t"
|
yading@10
|
1911 "abs.s %[di0], %[di0] \n\t"
|
yading@10
|
1912 "abs.s %[di1], %[di1] \n\t"
|
yading@10
|
1913 "abs.s %[di2], %[di2] \n\t"
|
yading@10
|
1914 "abs.s %[di3], %[di3] \n\t"
|
yading@10
|
1915 "lwc1 $f0, 0(%[vec]) \n\t"
|
yading@10
|
1916 "lwc1 $f1, 4(%[vec]) \n\t"
|
yading@10
|
1917 "lwc1 $f2, 0(%[vec2]) \n\t"
|
yading@10
|
1918 "lwc1 $f3, 4(%[vec2]) \n\t"
|
yading@10
|
1919 "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
|
yading@10
|
1920 "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
|
yading@10
|
1921 "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
|
yading@10
|
1922 "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
|
yading@10
|
1923
|
yading@10
|
1924 ".set pop \n\t"
|
yading@10
|
1925
|
yading@10
|
1926 : [di0]"=&f"(di0), [di1]"=&f"(di1),
|
yading@10
|
1927 [di2]"=&f"(di2), [di3]"=&f"(di3)
|
yading@10
|
1928 : [in_pos]"r"(in_pos), [vec]"r"(vec),
|
yading@10
|
1929 [vec2]"r"(vec2), [IQ]"f"(IQ)
|
yading@10
|
1930 : "$f0", "$f1", "$f2", "$f3",
|
yading@10
|
1931 "memory"
|
yading@10
|
1932 );
|
yading@10
|
1933
|
yading@10
|
1934 cost += di0 * di0 + di1 * di1
|
yading@10
|
1935 + di2 * di2 + di3 * di3;
|
yading@10
|
1936 }
|
yading@10
|
1937
|
yading@10
|
1938 if (bits)
|
yading@10
|
1939 *bits = curbits;
|
yading@10
|
1940 return cost * lambda + curbits;
|
yading@10
|
1941 }
|
yading@10
|
1942
|
yading@10
|
1943 static float get_band_cost_ESC_mips(struct AACEncContext *s,
|
yading@10
|
1944 PutBitContext *pb, const float *in,
|
yading@10
|
1945 const float *scaled, int size, int scale_idx,
|
yading@10
|
1946 int cb, const float lambda, const float uplim,
|
yading@10
|
1947 int *bits)
|
yading@10
|
1948 {
|
yading@10
|
1949 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
|
yading@10
|
1950 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
|
yading@10
|
1951 const float CLIPPED_ESCAPE = 165140.0f * IQ;
|
yading@10
|
1952 int i;
|
yading@10
|
1953 float cost = 0;
|
yading@10
|
1954 int qc1, qc2, qc3, qc4;
|
yading@10
|
1955 int curbits = 0;
|
yading@10
|
1956
|
yading@10
|
1957 uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
|
yading@10
|
1958 float *p_codes = (float* )ff_aac_codebook_vectors[cb-1];
|
yading@10
|
1959
|
yading@10
|
1960 for (i = 0; i < size; i += 4) {
|
yading@10
|
1961 const float *vec, *vec2;
|
yading@10
|
1962 int curidx, curidx2;
|
yading@10
|
1963 float t1, t2, t3, t4;
|
yading@10
|
1964 float di1, di2, di3, di4;
|
yading@10
|
1965 int cond0, cond1, cond2, cond3;
|
yading@10
|
1966 int c1, c2, c3, c4;
|
yading@10
|
1967
|
yading@10
|
1968 qc1 = scaled[i ] * Q34 + 0.4054f;
|
yading@10
|
1969 qc2 = scaled[i+1] * Q34 + 0.4054f;
|
yading@10
|
1970 qc3 = scaled[i+2] * Q34 + 0.4054f;
|
yading@10
|
1971 qc4 = scaled[i+3] * Q34 + 0.4054f;
|
yading@10
|
1972
|
yading@10
|
1973 __asm__ volatile (
|
yading@10
|
1974 ".set push \n\t"
|
yading@10
|
1975 ".set noreorder \n\t"
|
yading@10
|
1976
|
yading@10
|
1977 "ori $t4, $zero, 15 \n\t"
|
yading@10
|
1978 "ori $t5, $zero, 16 \n\t"
|
yading@10
|
1979 "shll_s.w %[c1], %[qc1], 18 \n\t"
|
yading@10
|
1980 "shll_s.w %[c2], %[qc2], 18 \n\t"
|
yading@10
|
1981 "shll_s.w %[c3], %[qc3], 18 \n\t"
|
yading@10
|
1982 "shll_s.w %[c4], %[qc4], 18 \n\t"
|
yading@10
|
1983 "srl %[c1], %[c1], 18 \n\t"
|
yading@10
|
1984 "srl %[c2], %[c2], 18 \n\t"
|
yading@10
|
1985 "srl %[c3], %[c3], 18 \n\t"
|
yading@10
|
1986 "srl %[c4], %[c4], 18 \n\t"
|
yading@10
|
1987 "slt %[cond0], $t4, %[qc1] \n\t"
|
yading@10
|
1988 "slt %[cond1], $t4, %[qc2] \n\t"
|
yading@10
|
1989 "slt %[cond2], $t4, %[qc3] \n\t"
|
yading@10
|
1990 "slt %[cond3], $t4, %[qc4] \n\t"
|
yading@10
|
1991 "movn %[qc1], $t5, %[cond0] \n\t"
|
yading@10
|
1992 "movn %[qc2], $t5, %[cond1] \n\t"
|
yading@10
|
1993 "movn %[qc3], $t5, %[cond2] \n\t"
|
yading@10
|
1994 "movn %[qc4], $t5, %[cond3] \n\t"
|
yading@10
|
1995
|
yading@10
|
1996 ".set pop \n\t"
|
yading@10
|
1997
|
yading@10
|
1998 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
|
yading@10
|
1999 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
|
yading@10
|
2000 [cond0]"=&r"(cond0), [cond1]"=&r"(cond1),
|
yading@10
|
2001 [cond2]"=&r"(cond2), [cond3]"=&r"(cond3),
|
yading@10
|
2002 [c1]"=&r"(c1), [c2]"=&r"(c2),
|
yading@10
|
2003 [c3]"=&r"(c3), [c4]"=&r"(c4)
|
yading@10
|
2004 :
|
yading@10
|
2005 : "t4", "t5"
|
yading@10
|
2006 );
|
yading@10
|
2007
|
yading@10
|
2008 curidx = 17 * qc1;
|
yading@10
|
2009 curidx += qc2;
|
yading@10
|
2010
|
yading@10
|
2011 curidx2 = 17 * qc3;
|
yading@10
|
2012 curidx2 += qc4;
|
yading@10
|
2013
|
yading@10
|
2014 curbits += p_bits[curidx];
|
yading@10
|
2015 curbits += esc_sign_bits[curidx];
|
yading@10
|
2016 vec = &p_codes[curidx*2];
|
yading@10
|
2017
|
yading@10
|
2018 curbits += p_bits[curidx2];
|
yading@10
|
2019 curbits += esc_sign_bits[curidx2];
|
yading@10
|
2020 vec2 = &p_codes[curidx2*2];
|
yading@10
|
2021
|
yading@10
|
2022 curbits += (av_log2(c1) * 2 - 3) & (-cond0);
|
yading@10
|
2023 curbits += (av_log2(c2) * 2 - 3) & (-cond1);
|
yading@10
|
2024 curbits += (av_log2(c3) * 2 - 3) & (-cond2);
|
yading@10
|
2025 curbits += (av_log2(c4) * 2 - 3) & (-cond3);
|
yading@10
|
2026
|
yading@10
|
2027 t1 = fabsf(in[i ]);
|
yading@10
|
2028 t2 = fabsf(in[i+1]);
|
yading@10
|
2029 t3 = fabsf(in[i+2]);
|
yading@10
|
2030 t4 = fabsf(in[i+3]);
|
yading@10
|
2031
|
yading@10
|
2032 if (cond0) {
|
yading@10
|
2033 if (t1 >= CLIPPED_ESCAPE) {
|
yading@10
|
2034 di1 = t1 - CLIPPED_ESCAPE;
|
yading@10
|
2035 } else {
|
yading@10
|
2036 di1 = t1 - c1 * cbrtf(c1) * IQ;
|
yading@10
|
2037 }
|
yading@10
|
2038 } else
|
yading@10
|
2039 di1 = t1 - vec[0] * IQ;
|
yading@10
|
2040
|
yading@10
|
2041 if (cond1) {
|
yading@10
|
2042 if (t2 >= CLIPPED_ESCAPE) {
|
yading@10
|
2043 di2 = t2 - CLIPPED_ESCAPE;
|
yading@10
|
2044 } else {
|
yading@10
|
2045 di2 = t2 - c2 * cbrtf(c2) * IQ;
|
yading@10
|
2046 }
|
yading@10
|
2047 } else
|
yading@10
|
2048 di2 = t2 - vec[1] * IQ;
|
yading@10
|
2049
|
yading@10
|
2050 if (cond2) {
|
yading@10
|
2051 if (t3 >= CLIPPED_ESCAPE) {
|
yading@10
|
2052 di3 = t3 - CLIPPED_ESCAPE;
|
yading@10
|
2053 } else {
|
yading@10
|
2054 di3 = t3 - c3 * cbrtf(c3) * IQ;
|
yading@10
|
2055 }
|
yading@10
|
2056 } else
|
yading@10
|
2057 di3 = t3 - vec2[0] * IQ;
|
yading@10
|
2058
|
yading@10
|
2059 if (cond3) {
|
yading@10
|
2060 if (t4 >= CLIPPED_ESCAPE) {
|
yading@10
|
2061 di4 = t4 - CLIPPED_ESCAPE;
|
yading@10
|
2062 } else {
|
yading@10
|
2063 di4 = t4 - c4 * cbrtf(c4) * IQ;
|
yading@10
|
2064 }
|
yading@10
|
2065 } else
|
yading@10
|
2066 di4 = t4 - vec2[1]*IQ;
|
yading@10
|
2067
|
yading@10
|
2068 cost += di1 * di1 + di2 * di2
|
yading@10
|
2069 + di3 * di3 + di4 * di4;
|
yading@10
|
2070 }
|
yading@10
|
2071
|
yading@10
|
2072 if (bits)
|
yading@10
|
2073 *bits = curbits;
|
yading@10
|
2074 return cost * lambda + curbits;
|
yading@10
|
2075 }
|
yading@10
|
2076
|
yading@10
|
2077 static float (*const get_band_cost_arr[])(struct AACEncContext *s,
|
yading@10
|
2078 PutBitContext *pb, const float *in,
|
yading@10
|
2079 const float *scaled, int size, int scale_idx,
|
yading@10
|
2080 int cb, const float lambda, const float uplim,
|
yading@10
|
2081 int *bits) = {
|
yading@10
|
2082 get_band_cost_ZERO_mips,
|
yading@10
|
2083 get_band_cost_SQUAD_mips,
|
yading@10
|
2084 get_band_cost_SQUAD_mips,
|
yading@10
|
2085 get_band_cost_UQUAD_mips,
|
yading@10
|
2086 get_band_cost_UQUAD_mips,
|
yading@10
|
2087 get_band_cost_SPAIR_mips,
|
yading@10
|
2088 get_band_cost_SPAIR_mips,
|
yading@10
|
2089 get_band_cost_UPAIR7_mips,
|
yading@10
|
2090 get_band_cost_UPAIR7_mips,
|
yading@10
|
2091 get_band_cost_UPAIR12_mips,
|
yading@10
|
2092 get_band_cost_UPAIR12_mips,
|
yading@10
|
2093 get_band_cost_ESC_mips,
|
yading@10
|
2094 };
|
yading@10
|
2095
|
yading@10
|
2096 #define get_band_cost( \
|
yading@10
|
2097 s, pb, in, scaled, size, scale_idx, cb, \
|
yading@10
|
2098 lambda, uplim, bits) \
|
yading@10
|
2099 get_band_cost_arr[cb]( \
|
yading@10
|
2100 s, pb, in, scaled, size, scale_idx, cb, \
|
yading@10
|
2101 lambda, uplim, bits)
|
yading@10
|
2102
|
yading@10
|
2103 static float quantize_band_cost(struct AACEncContext *s, const float *in,
|
yading@10
|
2104 const float *scaled, int size, int scale_idx,
|
yading@10
|
2105 int cb, const float lambda, const float uplim,
|
yading@10
|
2106 int *bits)
|
yading@10
|
2107 {
|
yading@10
|
2108 return get_band_cost(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits);
|
yading@10
|
2109 }
|
yading@10
|
2110
|
yading@10
|
2111 static void search_for_quantizers_twoloop_mips(AVCodecContext *avctx,
|
yading@10
|
2112 AACEncContext *s,
|
yading@10
|
2113 SingleChannelElement *sce,
|
yading@10
|
2114 const float lambda)
|
yading@10
|
2115 {
|
yading@10
|
2116 int start = 0, i, w, w2, g;
|
yading@10
|
2117 int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate / avctx->channels;
|
yading@10
|
2118 float dists[128] = { 0 }, uplims[128];
|
yading@10
|
2119 float maxvals[128];
|
yading@10
|
2120 int fflag, minscaler;
|
yading@10
|
2121 int its = 0;
|
yading@10
|
2122 int allz = 0;
|
yading@10
|
2123 float minthr = INFINITY;
|
yading@10
|
2124
|
yading@10
|
2125 destbits = FFMIN(destbits, 5800);
|
yading@10
|
2126 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
|
yading@10
|
2127 for (g = 0; g < sce->ics.num_swb; g++) {
|
yading@10
|
2128 int nz = 0;
|
yading@10
|
2129 float uplim = 0.0f;
|
yading@10
|
2130 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
|
yading@10
|
2131 FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
|
yading@10
|
2132 uplim += band->threshold;
|
yading@10
|
2133 if (band->energy <= band->threshold || band->threshold == 0.0f) {
|
yading@10
|
2134 sce->zeroes[(w+w2)*16+g] = 1;
|
yading@10
|
2135 continue;
|
yading@10
|
2136 }
|
yading@10
|
2137 nz = 1;
|
yading@10
|
2138 }
|
yading@10
|
2139 uplims[w*16+g] = uplim *512;
|
yading@10
|
2140 sce->zeroes[w*16+g] = !nz;
|
yading@10
|
2141 if (nz)
|
yading@10
|
2142 minthr = FFMIN(minthr, uplim);
|
yading@10
|
2143 allz |= nz;
|
yading@10
|
2144 }
|
yading@10
|
2145 }
|
yading@10
|
2146 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
|
yading@10
|
2147 for (g = 0; g < sce->ics.num_swb; g++) {
|
yading@10
|
2148 if (sce->zeroes[w*16+g]) {
|
yading@10
|
2149 sce->sf_idx[w*16+g] = SCALE_ONE_POS;
|
yading@10
|
2150 continue;
|
yading@10
|
2151 }
|
yading@10
|
2152 sce->sf_idx[w*16+g] = SCALE_ONE_POS + FFMIN(log2f(uplims[w*16+g]/minthr)*4,59);
|
yading@10
|
2153 }
|
yading@10
|
2154 }
|
yading@10
|
2155
|
yading@10
|
2156 if (!allz)
|
yading@10
|
2157 return;
|
yading@10
|
2158 abs_pow34_v(s->scoefs, sce->coeffs, 1024);
|
yading@10
|
2159
|
yading@10
|
2160 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
|
yading@10
|
2161 start = w*128;
|
yading@10
|
2162 for (g = 0; g < sce->ics.num_swb; g++) {
|
yading@10
|
2163 const float *scaled = s->scoefs + start;
|
yading@10
|
2164 maxvals[w*16+g] = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], scaled);
|
yading@10
|
2165 start += sce->ics.swb_sizes[g];
|
yading@10
|
2166 }
|
yading@10
|
2167 }
|
yading@10
|
2168
|
yading@10
|
2169 do {
|
yading@10
|
2170 int tbits, qstep;
|
yading@10
|
2171 minscaler = sce->sf_idx[0];
|
yading@10
|
2172 qstep = its ? 1 : 32;
|
yading@10
|
2173 do {
|
yading@10
|
2174 int prev = -1;
|
yading@10
|
2175 tbits = 0;
|
yading@10
|
2176 fflag = 0;
|
yading@10
|
2177
|
yading@10
|
2178 if (qstep > 1) {
|
yading@10
|
2179 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
|
yading@10
|
2180 start = w*128;
|
yading@10
|
2181 for (g = 0; g < sce->ics.num_swb; g++) {
|
yading@10
|
2182 const float *coefs = sce->coeffs + start;
|
yading@10
|
2183 const float *scaled = s->scoefs + start;
|
yading@10
|
2184 int bits = 0;
|
yading@10
|
2185 int cb;
|
yading@10
|
2186
|
yading@10
|
2187 if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
|
yading@10
|
2188 start += sce->ics.swb_sizes[g];
|
yading@10
|
2189 continue;
|
yading@10
|
2190 }
|
yading@10
|
2191 minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
|
yading@10
|
2192 cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
|
yading@10
|
2193 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
|
yading@10
|
2194 int b;
|
yading@10
|
2195 bits += quantize_band_cost_bits(s, coefs + w2*128,
|
yading@10
|
2196 scaled + w2*128,
|
yading@10
|
2197 sce->ics.swb_sizes[g],
|
yading@10
|
2198 sce->sf_idx[w*16+g],
|
yading@10
|
2199 cb,
|
yading@10
|
2200 1.0f,
|
yading@10
|
2201 INFINITY,
|
yading@10
|
2202 &b);
|
yading@10
|
2203 }
|
yading@10
|
2204 if (prev != -1) {
|
yading@10
|
2205 bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO];
|
yading@10
|
2206 }
|
yading@10
|
2207 tbits += bits;
|
yading@10
|
2208 start += sce->ics.swb_sizes[g];
|
yading@10
|
2209 prev = sce->sf_idx[w*16+g];
|
yading@10
|
2210 }
|
yading@10
|
2211 }
|
yading@10
|
2212 }
|
yading@10
|
2213 else {
|
yading@10
|
2214 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
|
yading@10
|
2215 start = w*128;
|
yading@10
|
2216 for (g = 0; g < sce->ics.num_swb; g++) {
|
yading@10
|
2217 const float *coefs = sce->coeffs + start;
|
yading@10
|
2218 const float *scaled = s->scoefs + start;
|
yading@10
|
2219 int bits = 0;
|
yading@10
|
2220 int cb;
|
yading@10
|
2221 float dist = 0.0f;
|
yading@10
|
2222
|
yading@10
|
2223 if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
|
yading@10
|
2224 start += sce->ics.swb_sizes[g];
|
yading@10
|
2225 continue;
|
yading@10
|
2226 }
|
yading@10
|
2227 minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
|
yading@10
|
2228 cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
|
yading@10
|
2229 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
|
yading@10
|
2230 int b;
|
yading@10
|
2231 dist += quantize_band_cost(s, coefs + w2*128,
|
yading@10
|
2232 scaled + w2*128,
|
yading@10
|
2233 sce->ics.swb_sizes[g],
|
yading@10
|
2234 sce->sf_idx[w*16+g],
|
yading@10
|
2235 cb,
|
yading@10
|
2236 1.0f,
|
yading@10
|
2237 INFINITY,
|
yading@10
|
2238 &b);
|
yading@10
|
2239 bits += b;
|
yading@10
|
2240 }
|
yading@10
|
2241 dists[w*16+g] = dist - bits;
|
yading@10
|
2242 if (prev != -1) {
|
yading@10
|
2243 bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO];
|
yading@10
|
2244 }
|
yading@10
|
2245 tbits += bits;
|
yading@10
|
2246 start += sce->ics.swb_sizes[g];
|
yading@10
|
2247 prev = sce->sf_idx[w*16+g];
|
yading@10
|
2248 }
|
yading@10
|
2249 }
|
yading@10
|
2250 }
|
yading@10
|
2251 if (tbits > destbits) {
|
yading@10
|
2252 for (i = 0; i < 128; i++)
|
yading@10
|
2253 if (sce->sf_idx[i] < 218 - qstep)
|
yading@10
|
2254 sce->sf_idx[i] += qstep;
|
yading@10
|
2255 } else {
|
yading@10
|
2256 for (i = 0; i < 128; i++)
|
yading@10
|
2257 if (sce->sf_idx[i] > 60 - qstep)
|
yading@10
|
2258 sce->sf_idx[i] -= qstep;
|
yading@10
|
2259 }
|
yading@10
|
2260 qstep >>= 1;
|
yading@10
|
2261 if (!qstep && tbits > destbits*1.02 && sce->sf_idx[0] < 217)
|
yading@10
|
2262 qstep = 1;
|
yading@10
|
2263 } while (qstep);
|
yading@10
|
2264
|
yading@10
|
2265 fflag = 0;
|
yading@10
|
2266 minscaler = av_clip(minscaler, 60, 255 - SCALE_MAX_DIFF);
|
yading@10
|
2267 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
|
yading@10
|
2268 for (g = 0; g < sce->ics.num_swb; g++) {
|
yading@10
|
2269 int prevsc = sce->sf_idx[w*16+g];
|
yading@10
|
2270 if (dists[w*16+g] > uplims[w*16+g] && sce->sf_idx[w*16+g] > 60) {
|
yading@10
|
2271 if (find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1))
|
yading@10
|
2272 sce->sf_idx[w*16+g]--;
|
yading@10
|
2273 else
|
yading@10
|
2274 sce->sf_idx[w*16+g]-=2;
|
yading@10
|
2275 }
|
yading@10
|
2276 sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF);
|
yading@10
|
2277 sce->sf_idx[w*16+g] = FFMIN(sce->sf_idx[w*16+g], 219);
|
yading@10
|
2278 if (sce->sf_idx[w*16+g] != prevsc)
|
yading@10
|
2279 fflag = 1;
|
yading@10
|
2280 sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
|
yading@10
|
2281 }
|
yading@10
|
2282 }
|
yading@10
|
2283 its++;
|
yading@10
|
2284 } while (fflag && its < 10);
|
yading@10
|
2285 }
|
yading@10
|
2286
|
yading@10
|
2287 static void search_for_ms_mips(AACEncContext *s, ChannelElement *cpe,
|
yading@10
|
2288 const float lambda)
|
yading@10
|
2289 {
|
yading@10
|
2290 int start = 0, i, w, w2, g;
|
yading@10
|
2291 float M[128], S[128];
|
yading@10
|
2292 float *L34 = s->scoefs, *R34 = s->scoefs + 128, *M34 = s->scoefs + 128*2, *S34 = s->scoefs + 128*3;
|
yading@10
|
2293 SingleChannelElement *sce0 = &cpe->ch[0];
|
yading@10
|
2294 SingleChannelElement *sce1 = &cpe->ch[1];
|
yading@10
|
2295 if (!cpe->common_window)
|
yading@10
|
2296 return;
|
yading@10
|
2297 for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
|
yading@10
|
2298 for (g = 0; g < sce0->ics.num_swb; g++) {
|
yading@10
|
2299 if (!cpe->ch[0].zeroes[w*16+g] && !cpe->ch[1].zeroes[w*16+g]) {
|
yading@10
|
2300 float dist1 = 0.0f, dist2 = 0.0f;
|
yading@10
|
2301 for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
|
yading@10
|
2302 FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
|
yading@10
|
2303 FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
|
yading@10
|
2304 float minthr = FFMIN(band0->threshold, band1->threshold);
|
yading@10
|
2305 float maxthr = FFMAX(band0->threshold, band1->threshold);
|
yading@10
|
2306 for (i = 0; i < sce0->ics.swb_sizes[g]; i+=4) {
|
yading@10
|
2307 M[i ] = (sce0->coeffs[start+w2*128+i ]
|
yading@10
|
2308 + sce1->coeffs[start+w2*128+i ]) * 0.5;
|
yading@10
|
2309 M[i+1] = (sce0->coeffs[start+w2*128+i+1]
|
yading@10
|
2310 + sce1->coeffs[start+w2*128+i+1]) * 0.5;
|
yading@10
|
2311 M[i+2] = (sce0->coeffs[start+w2*128+i+2]
|
yading@10
|
2312 + sce1->coeffs[start+w2*128+i+2]) * 0.5;
|
yading@10
|
2313 M[i+3] = (sce0->coeffs[start+w2*128+i+3]
|
yading@10
|
2314 + sce1->coeffs[start+w2*128+i+3]) * 0.5;
|
yading@10
|
2315
|
yading@10
|
2316 S[i ] = M[i ]
|
yading@10
|
2317 - sce1->coeffs[start+w2*128+i ];
|
yading@10
|
2318 S[i+1] = M[i+1]
|
yading@10
|
2319 - sce1->coeffs[start+w2*128+i+1];
|
yading@10
|
2320 S[i+2] = M[i+2]
|
yading@10
|
2321 - sce1->coeffs[start+w2*128+i+2];
|
yading@10
|
2322 S[i+3] = M[i+3]
|
yading@10
|
2323 - sce1->coeffs[start+w2*128+i+3];
|
yading@10
|
2324 }
|
yading@10
|
2325 abs_pow34_v(L34, sce0->coeffs+start+w2*128, sce0->ics.swb_sizes[g]);
|
yading@10
|
2326 abs_pow34_v(R34, sce1->coeffs+start+w2*128, sce0->ics.swb_sizes[g]);
|
yading@10
|
2327 abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]);
|
yading@10
|
2328 abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]);
|
yading@10
|
2329 dist1 += quantize_band_cost(s, sce0->coeffs + start + w2*128,
|
yading@10
|
2330 L34,
|
yading@10
|
2331 sce0->ics.swb_sizes[g],
|
yading@10
|
2332 sce0->sf_idx[(w+w2)*16+g],
|
yading@10
|
2333 sce0->band_type[(w+w2)*16+g],
|
yading@10
|
2334 lambda / band0->threshold, INFINITY, NULL);
|
yading@10
|
2335 dist1 += quantize_band_cost(s, sce1->coeffs + start + w2*128,
|
yading@10
|
2336 R34,
|
yading@10
|
2337 sce1->ics.swb_sizes[g],
|
yading@10
|
2338 sce1->sf_idx[(w+w2)*16+g],
|
yading@10
|
2339 sce1->band_type[(w+w2)*16+g],
|
yading@10
|
2340 lambda / band1->threshold, INFINITY, NULL);
|
yading@10
|
2341 dist2 += quantize_band_cost(s, M,
|
yading@10
|
2342 M34,
|
yading@10
|
2343 sce0->ics.swb_sizes[g],
|
yading@10
|
2344 sce0->sf_idx[(w+w2)*16+g],
|
yading@10
|
2345 sce0->band_type[(w+w2)*16+g],
|
yading@10
|
2346 lambda / maxthr, INFINITY, NULL);
|
yading@10
|
2347 dist2 += quantize_band_cost(s, S,
|
yading@10
|
2348 S34,
|
yading@10
|
2349 sce1->ics.swb_sizes[g],
|
yading@10
|
2350 sce1->sf_idx[(w+w2)*16+g],
|
yading@10
|
2351 sce1->band_type[(w+w2)*16+g],
|
yading@10
|
2352 lambda / minthr, INFINITY, NULL);
|
yading@10
|
2353 }
|
yading@10
|
2354 cpe->ms_mask[w*16+g] = dist2 < dist1;
|
yading@10
|
2355 }
|
yading@10
|
2356 start += sce0->ics.swb_sizes[g];
|
yading@10
|
2357 }
|
yading@10
|
2358 }
|
yading@10
|
2359 }
|
yading@10
|
2360 #endif /*HAVE_MIPSFPU */
|
yading@10
|
2361
|
yading@10
|
2362 static void codebook_trellis_rate_mips(AACEncContext *s, SingleChannelElement *sce,
|
yading@10
|
2363 int win, int group_len, const float lambda)
|
yading@10
|
2364 {
|
yading@10
|
2365 BandCodingPath path[120][12];
|
yading@10
|
2366 int w, swb, cb, start, size;
|
yading@10
|
2367 int i, j;
|
yading@10
|
2368 const int max_sfb = sce->ics.max_sfb;
|
yading@10
|
2369 const int run_bits = sce->ics.num_windows == 1 ? 5 : 3;
|
yading@10
|
2370 const int run_esc = (1 << run_bits) - 1;
|
yading@10
|
2371 int idx, ppos, count;
|
yading@10
|
2372 int stackrun[120], stackcb[120], stack_len;
|
yading@10
|
2373 float next_minbits = INFINITY;
|
yading@10
|
2374 int next_mincb = 0;
|
yading@10
|
2375
|
yading@10
|
2376 abs_pow34_v(s->scoefs, sce->coeffs, 1024);
|
yading@10
|
2377 start = win*128;
|
yading@10
|
2378 for (cb = 0; cb < 12; cb++) {
|
yading@10
|
2379 path[0][cb].cost = run_bits+4;
|
yading@10
|
2380 path[0][cb].prev_idx = -1;
|
yading@10
|
2381 path[0][cb].run = 0;
|
yading@10
|
2382 }
|
yading@10
|
2383 for (swb = 0; swb < max_sfb; swb++) {
|
yading@10
|
2384 size = sce->ics.swb_sizes[swb];
|
yading@10
|
2385 if (sce->zeroes[win*16 + swb]) {
|
yading@10
|
2386 float cost_stay_here = path[swb][0].cost;
|
yading@10
|
2387 float cost_get_here = next_minbits + run_bits + 4;
|
yading@10
|
2388 if ( run_value_bits[sce->ics.num_windows == 8][path[swb][0].run]
|
yading@10
|
2389 != run_value_bits[sce->ics.num_windows == 8][path[swb][0].run+1])
|
yading@10
|
2390 cost_stay_here += run_bits;
|
yading@10
|
2391 if (cost_get_here < cost_stay_here) {
|
yading@10
|
2392 path[swb+1][0].prev_idx = next_mincb;
|
yading@10
|
2393 path[swb+1][0].cost = cost_get_here;
|
yading@10
|
2394 path[swb+1][0].run = 1;
|
yading@10
|
2395 } else {
|
yading@10
|
2396 path[swb+1][0].prev_idx = 0;
|
yading@10
|
2397 path[swb+1][0].cost = cost_stay_here;
|
yading@10
|
2398 path[swb+1][0].run = path[swb][0].run + 1;
|
yading@10
|
2399 }
|
yading@10
|
2400 next_minbits = path[swb+1][0].cost;
|
yading@10
|
2401 next_mincb = 0;
|
yading@10
|
2402 for (cb = 1; cb < 12; cb++) {
|
yading@10
|
2403 path[swb+1][cb].cost = 61450;
|
yading@10
|
2404 path[swb+1][cb].prev_idx = -1;
|
yading@10
|
2405 path[swb+1][cb].run = 0;
|
yading@10
|
2406 }
|
yading@10
|
2407 } else {
|
yading@10
|
2408 float minbits = next_minbits;
|
yading@10
|
2409 int mincb = next_mincb;
|
yading@10
|
2410 int startcb = sce->band_type[win*16+swb];
|
yading@10
|
2411 next_minbits = INFINITY;
|
yading@10
|
2412 next_mincb = 0;
|
yading@10
|
2413 for (cb = 0; cb < startcb; cb++) {
|
yading@10
|
2414 path[swb+1][cb].cost = 61450;
|
yading@10
|
2415 path[swb+1][cb].prev_idx = -1;
|
yading@10
|
2416 path[swb+1][cb].run = 0;
|
yading@10
|
2417 }
|
yading@10
|
2418 for (cb = startcb; cb < 12; cb++) {
|
yading@10
|
2419 float cost_stay_here, cost_get_here;
|
yading@10
|
2420 float bits = 0.0f;
|
yading@10
|
2421 for (w = 0; w < group_len; w++) {
|
yading@10
|
2422 bits += quantize_band_cost_bits(s, sce->coeffs + start + w*128,
|
yading@10
|
2423 s->scoefs + start + w*128, size,
|
yading@10
|
2424 sce->sf_idx[(win+w)*16+swb], cb,
|
yading@10
|
2425 0, INFINITY, NULL);
|
yading@10
|
2426 }
|
yading@10
|
2427 cost_stay_here = path[swb][cb].cost + bits;
|
yading@10
|
2428 cost_get_here = minbits + bits + run_bits + 4;
|
yading@10
|
2429 if ( run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run]
|
yading@10
|
2430 != run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run+1])
|
yading@10
|
2431 cost_stay_here += run_bits;
|
yading@10
|
2432 if (cost_get_here < cost_stay_here) {
|
yading@10
|
2433 path[swb+1][cb].prev_idx = mincb;
|
yading@10
|
2434 path[swb+1][cb].cost = cost_get_here;
|
yading@10
|
2435 path[swb+1][cb].run = 1;
|
yading@10
|
2436 } else {
|
yading@10
|
2437 path[swb+1][cb].prev_idx = cb;
|
yading@10
|
2438 path[swb+1][cb].cost = cost_stay_here;
|
yading@10
|
2439 path[swb+1][cb].run = path[swb][cb].run + 1;
|
yading@10
|
2440 }
|
yading@10
|
2441 if (path[swb+1][cb].cost < next_minbits) {
|
yading@10
|
2442 next_minbits = path[swb+1][cb].cost;
|
yading@10
|
2443 next_mincb = cb;
|
yading@10
|
2444 }
|
yading@10
|
2445 }
|
yading@10
|
2446 }
|
yading@10
|
2447 start += sce->ics.swb_sizes[swb];
|
yading@10
|
2448 }
|
yading@10
|
2449
|
yading@10
|
2450 stack_len = 0;
|
yading@10
|
2451 idx = 0;
|
yading@10
|
2452 for (cb = 1; cb < 12; cb++)
|
yading@10
|
2453 if (path[max_sfb][cb].cost < path[max_sfb][idx].cost)
|
yading@10
|
2454 idx = cb;
|
yading@10
|
2455 ppos = max_sfb;
|
yading@10
|
2456 while (ppos > 0) {
|
yading@10
|
2457 av_assert1(idx >= 0);
|
yading@10
|
2458 cb = idx;
|
yading@10
|
2459 stackrun[stack_len] = path[ppos][cb].run;
|
yading@10
|
2460 stackcb [stack_len] = cb;
|
yading@10
|
2461 idx = path[ppos-path[ppos][cb].run+1][cb].prev_idx;
|
yading@10
|
2462 ppos -= path[ppos][cb].run;
|
yading@10
|
2463 stack_len++;
|
yading@10
|
2464 }
|
yading@10
|
2465
|
yading@10
|
2466 start = 0;
|
yading@10
|
2467 for (i = stack_len - 1; i >= 0; i--) {
|
yading@10
|
2468 put_bits(&s->pb, 4, stackcb[i]);
|
yading@10
|
2469 count = stackrun[i];
|
yading@10
|
2470 memset(sce->zeroes + win*16 + start, !stackcb[i], count);
|
yading@10
|
2471 for (j = 0; j < count; j++) {
|
yading@10
|
2472 sce->band_type[win*16 + start] = stackcb[i];
|
yading@10
|
2473 start++;
|
yading@10
|
2474 }
|
yading@10
|
2475 while (count >= run_esc) {
|
yading@10
|
2476 put_bits(&s->pb, run_bits, run_esc);
|
yading@10
|
2477 count -= run_esc;
|
yading@10
|
2478 }
|
yading@10
|
2479 put_bits(&s->pb, run_bits, count);
|
yading@10
|
2480 }
|
yading@10
|
2481 }
|
yading@10
|
2482 #endif /* HAVE_INLINE_ASM */
|
yading@10
|
2483
|
yading@10
|
2484 void ff_aac_coder_init_mips(AACEncContext *c) {
|
yading@10
|
2485 #if HAVE_INLINE_ASM
|
yading@10
|
2486 AACCoefficientsEncoder *e = c->coder;
|
yading@10
|
2487 int option = c->options.aac_coder;
|
yading@10
|
2488
|
yading@10
|
2489 if (option == 2) {
|
yading@10
|
2490 e->quantize_and_encode_band = quantize_and_encode_band_mips;
|
yading@10
|
2491 e->encode_window_bands_info = codebook_trellis_rate_mips;
|
yading@10
|
2492 #if HAVE_MIPSFPU
|
yading@10
|
2493 e->search_for_quantizers = search_for_quantizers_twoloop_mips;
|
yading@10
|
2494 e->search_for_ms = search_for_ms_mips;
|
yading@10
|
2495 #endif /* HAVE_MIPSFPU */
|
yading@10
|
2496 }
|
yading@10
|
2497 #endif /* HAVE_INLINE_ASM */
|
yading@10
|
2498 }
|