annotate ffmpeg/libavcodec/mips/aaccoder_mips.c @ 13:844d341cf643 tip

Back up before ISMIR
author Yading Song <yading.song@eecs.qmul.ac.uk>
date Thu, 31 Oct 2013 13:17:06 +0000
parents 6840f77b83aa
children
rev   line source
yading@10 1 /*
yading@10 2 * Copyright (c) 2012
yading@10 3 * MIPS Technologies, Inc., California.
yading@10 4 *
yading@10 5 * Redistribution and use in source and binary forms, with or without
yading@10 6 * modification, are permitted provided that the following conditions
yading@10 7 * are met:
yading@10 8 * 1. Redistributions of source code must retain the above copyright
yading@10 9 * notice, this list of conditions and the following disclaimer.
yading@10 10 * 2. Redistributions in binary form must reproduce the above copyright
yading@10 11 * notice, this list of conditions and the following disclaimer in the
yading@10 12 * documentation and/or other materials provided with the distribution.
yading@10 13 * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
yading@10 14 * contributors may be used to endorse or promote products derived from
yading@10 15 * this software without specific prior written permission.
yading@10 16 *
yading@10 17 * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
yading@10 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
yading@10 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
yading@10 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
yading@10 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
yading@10 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
yading@10 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
yading@10 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
yading@10 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
yading@10 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
yading@10 27 * SUCH DAMAGE.
yading@10 28 *
yading@10 29 * Author: Stanislav Ocovaj (socovaj@mips.com)
yading@10 30 * Szabolcs Pal (sabolc@mips.com)
yading@10 31 *
yading@10 32 * AAC coefficients encoder optimized for MIPS floating-point architecture
yading@10 33 *
yading@10 34 * This file is part of FFmpeg.
yading@10 35 *
yading@10 36 * FFmpeg is free software; you can redistribute it and/or
yading@10 37 * modify it under the terms of the GNU Lesser General Public
yading@10 38 * License as published by the Free Software Foundation; either
yading@10 39 * version 2.1 of the License, or (at your option) any later version.
yading@10 40 *
yading@10 41 * FFmpeg is distributed in the hope that it will be useful,
yading@10 42 * but WITHOUT ANY WARRANTY; without even the implied warranty of
yading@10 43 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
yading@10 44 * Lesser General Public License for more details.
yading@10 45 *
yading@10 46 * You should have received a copy of the GNU Lesser General Public
yading@10 47 * License along with FFmpeg; if not, write to the Free Software
yading@10 48 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
yading@10 49 */
yading@10 50
yading@10 51 /**
yading@10 52 * @file
yading@10 53 * Reference: libavcodec/aaccoder.c
yading@10 54 */
yading@10 55
yading@10 56 #include "libavutil/libm.h"
yading@10 57
yading@10 58 #include <float.h>
yading@10 59 #include "libavutil/mathematics.h"
yading@10 60 #include "libavcodec/avcodec.h"
yading@10 61 #include "libavcodec/put_bits.h"
yading@10 62 #include "libavcodec/aac.h"
yading@10 63 #include "libavcodec/aacenc.h"
yading@10 64 #include "libavcodec/aactab.h"
yading@10 65
yading@10 66 #if HAVE_INLINE_ASM
yading@10 67 typedef struct BandCodingPath {
yading@10 68 int prev_idx;
yading@10 69 float cost;
yading@10 70 int run;
yading@10 71 } BandCodingPath;
yading@10 72
yading@10 73 static const uint8_t run_value_bits_long[64] = {
yading@10 74 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
yading@10 75 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 10,
yading@10 76 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
yading@10 77 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 15
yading@10 78 };
yading@10 79
yading@10 80 static const uint8_t run_value_bits_short[16] = {
yading@10 81 3, 3, 3, 3, 3, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 9
yading@10 82 };
yading@10 83
yading@10 84 static const uint8_t *run_value_bits[2] = {
yading@10 85 run_value_bits_long, run_value_bits_short
yading@10 86 };
yading@10 87
yading@10 88 static const uint8_t uquad_sign_bits[81] = {
yading@10 89 0, 1, 1, 1, 2, 2, 1, 2, 2,
yading@10 90 1, 2, 2, 2, 3, 3, 2, 3, 3,
yading@10 91 1, 2, 2, 2, 3, 3, 2, 3, 3,
yading@10 92 1, 2, 2, 2, 3, 3, 2, 3, 3,
yading@10 93 2, 3, 3, 3, 4, 4, 3, 4, 4,
yading@10 94 2, 3, 3, 3, 4, 4, 3, 4, 4,
yading@10 95 1, 2, 2, 2, 3, 3, 2, 3, 3,
yading@10 96 2, 3, 3, 3, 4, 4, 3, 4, 4,
yading@10 97 2, 3, 3, 3, 4, 4, 3, 4, 4
yading@10 98 };
yading@10 99
yading@10 100 static const uint8_t upair7_sign_bits[64] = {
yading@10 101 0, 1, 1, 1, 1, 1, 1, 1,
yading@10 102 1, 2, 2, 2, 2, 2, 2, 2,
yading@10 103 1, 2, 2, 2, 2, 2, 2, 2,
yading@10 104 1, 2, 2, 2, 2, 2, 2, 2,
yading@10 105 1, 2, 2, 2, 2, 2, 2, 2,
yading@10 106 1, 2, 2, 2, 2, 2, 2, 2,
yading@10 107 1, 2, 2, 2, 2, 2, 2, 2,
yading@10 108 1, 2, 2, 2, 2, 2, 2, 2,
yading@10 109 };
yading@10 110
yading@10 111 static const uint8_t upair12_sign_bits[169] = {
yading@10 112 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
yading@10 113 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
yading@10 114 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
yading@10 115 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
yading@10 116 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
yading@10 117 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
yading@10 118 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
yading@10 119 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
yading@10 120 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
yading@10 121 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
yading@10 122 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
yading@10 123 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
yading@10 124 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
yading@10 125 };
yading@10 126
yading@10 127 static const uint8_t esc_sign_bits[289] = {
yading@10 128 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
yading@10 129 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
yading@10 130 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
yading@10 131 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
yading@10 132 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
yading@10 133 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
yading@10 134 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
yading@10 135 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
yading@10 136 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
yading@10 137 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
yading@10 138 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
yading@10 139 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
yading@10 140 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
yading@10 141 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
yading@10 142 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
yading@10 143 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
yading@10 144 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
yading@10 145 };
yading@10 146
yading@10 147 static void abs_pow34_v(float *out, const float *in, const int size) {
yading@10 148 #ifndef USE_REALLY_FULL_SEARCH
yading@10 149 int i;
yading@10 150 float a, b, c, d;
yading@10 151 float ax, bx, cx, dx;
yading@10 152
yading@10 153 for (i = 0; i < size; i += 4) {
yading@10 154 a = fabsf(in[i ]);
yading@10 155 b = fabsf(in[i+1]);
yading@10 156 c = fabsf(in[i+2]);
yading@10 157 d = fabsf(in[i+3]);
yading@10 158
yading@10 159 ax = sqrtf(a);
yading@10 160 bx = sqrtf(b);
yading@10 161 cx = sqrtf(c);
yading@10 162 dx = sqrtf(d);
yading@10 163
yading@10 164 a = a * ax;
yading@10 165 b = b * bx;
yading@10 166 c = c * cx;
yading@10 167 d = d * dx;
yading@10 168
yading@10 169 out[i ] = sqrtf(a);
yading@10 170 out[i+1] = sqrtf(b);
yading@10 171 out[i+2] = sqrtf(c);
yading@10 172 out[i+3] = sqrtf(d);
yading@10 173 }
yading@10 174 #endif /* USE_REALLY_FULL_SEARCH */
yading@10 175 }
yading@10 176
yading@10 177 static float find_max_val(int group_len, int swb_size, const float *scaled) {
yading@10 178 float maxval = 0.0f;
yading@10 179 int w2, i;
yading@10 180 for (w2 = 0; w2 < group_len; w2++) {
yading@10 181 for (i = 0; i < swb_size; i++) {
yading@10 182 maxval = FFMAX(maxval, scaled[w2*128+i]);
yading@10 183 }
yading@10 184 }
yading@10 185 return maxval;
yading@10 186 }
yading@10 187
yading@10 188 static int find_min_book(float maxval, int sf) {
yading@10 189 float Q = ff_aac_pow2sf_tab[POW_SF2_ZERO - sf + SCALE_ONE_POS - SCALE_DIV_512];
yading@10 190 float Q34 = sqrtf(Q * sqrtf(Q));
yading@10 191 int qmaxval, cb;
yading@10 192 qmaxval = maxval * Q34 + 0.4054f;
yading@10 193 if (qmaxval == 0) cb = 0;
yading@10 194 else if (qmaxval == 1) cb = 1;
yading@10 195 else if (qmaxval == 2) cb = 3;
yading@10 196 else if (qmaxval <= 4) cb = 5;
yading@10 197 else if (qmaxval <= 7) cb = 7;
yading@10 198 else if (qmaxval <= 12) cb = 9;
yading@10 199 else cb = 11;
yading@10 200 return cb;
yading@10 201 }
yading@10 202
yading@10 203 /**
yading@10 204 * Functions developed from template function and optimized for quantizing and encoding band
yading@10 205 */
yading@10 206 static void quantize_and_encode_band_cost_SQUAD_mips(struct AACEncContext *s,
yading@10 207 PutBitContext *pb, const float *in,
yading@10 208 const float *scaled, int size, int scale_idx,
yading@10 209 int cb, const float lambda, const float uplim,
yading@10 210 int *bits)
yading@10 211 {
yading@10 212 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
yading@10 213 int i;
yading@10 214 int qc1, qc2, qc3, qc4;
yading@10 215
yading@10 216 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
yading@10 217 uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
yading@10 218
yading@10 219 abs_pow34_v(s->scoefs, in, size);
yading@10 220 scaled = s->scoefs;
yading@10 221 for (i = 0; i < size; i += 4) {
yading@10 222 int curidx;
yading@10 223 int *in_int = (int *)&in[i];
yading@10 224
yading@10 225 qc1 = scaled[i ] * Q34 + 0.4054f;
yading@10 226 qc2 = scaled[i+1] * Q34 + 0.4054f;
yading@10 227 qc3 = scaled[i+2] * Q34 + 0.4054f;
yading@10 228 qc4 = scaled[i+3] * Q34 + 0.4054f;
yading@10 229
yading@10 230 __asm__ volatile (
yading@10 231 ".set push \n\t"
yading@10 232 ".set noreorder \n\t"
yading@10 233
yading@10 234 "slt %[qc1], $zero, %[qc1] \n\t"
yading@10 235 "slt %[qc2], $zero, %[qc2] \n\t"
yading@10 236 "slt %[qc3], $zero, %[qc3] \n\t"
yading@10 237 "slt %[qc4], $zero, %[qc4] \n\t"
yading@10 238 "lw $t0, 0(%[in_int]) \n\t"
yading@10 239 "lw $t1, 4(%[in_int]) \n\t"
yading@10 240 "lw $t2, 8(%[in_int]) \n\t"
yading@10 241 "lw $t3, 12(%[in_int]) \n\t"
yading@10 242 "srl $t0, $t0, 31 \n\t"
yading@10 243 "srl $t1, $t1, 31 \n\t"
yading@10 244 "srl $t2, $t2, 31 \n\t"
yading@10 245 "srl $t3, $t3, 31 \n\t"
yading@10 246 "subu $t4, $zero, %[qc1] \n\t"
yading@10 247 "subu $t5, $zero, %[qc2] \n\t"
yading@10 248 "subu $t6, $zero, %[qc3] \n\t"
yading@10 249 "subu $t7, $zero, %[qc4] \n\t"
yading@10 250 "movn %[qc1], $t4, $t0 \n\t"
yading@10 251 "movn %[qc2], $t5, $t1 \n\t"
yading@10 252 "movn %[qc3], $t6, $t2 \n\t"
yading@10 253 "movn %[qc4], $t7, $t3 \n\t"
yading@10 254
yading@10 255 ".set pop \n\t"
yading@10 256
yading@10 257 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
yading@10 258 [qc3]"+r"(qc3), [qc4]"+r"(qc4)
yading@10 259 : [in_int]"r"(in_int)
yading@10 260 : "t0", "t1", "t2", "t3",
yading@10 261 "t4", "t5", "t6", "t7",
yading@10 262 "memory"
yading@10 263 );
yading@10 264
yading@10 265 curidx = qc1;
yading@10 266 curidx *= 3;
yading@10 267 curidx += qc2;
yading@10 268 curidx *= 3;
yading@10 269 curidx += qc3;
yading@10 270 curidx *= 3;
yading@10 271 curidx += qc4;
yading@10 272 curidx += 40;
yading@10 273
yading@10 274 put_bits(pb, p_bits[curidx], p_codes[curidx]);
yading@10 275 }
yading@10 276 }
yading@10 277
yading@10 278 static void quantize_and_encode_band_cost_UQUAD_mips(struct AACEncContext *s,
yading@10 279 PutBitContext *pb, const float *in,
yading@10 280 const float *scaled, int size, int scale_idx,
yading@10 281 int cb, const float lambda, const float uplim,
yading@10 282 int *bits)
yading@10 283 {
yading@10 284 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
yading@10 285 int i;
yading@10 286 int qc1, qc2, qc3, qc4;
yading@10 287
yading@10 288 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
yading@10 289 uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
yading@10 290
yading@10 291 abs_pow34_v(s->scoefs, in, size);
yading@10 292 scaled = s->scoefs;
yading@10 293 for (i = 0; i < size; i += 4) {
yading@10 294 int curidx, sign, count;
yading@10 295 int *in_int = (int *)&in[i];
yading@10 296 uint8_t v_bits;
yading@10 297 unsigned int v_codes;
yading@10 298
yading@10 299 qc1 = scaled[i ] * Q34 + 0.4054f;
yading@10 300 qc2 = scaled[i+1] * Q34 + 0.4054f;
yading@10 301 qc3 = scaled[i+2] * Q34 + 0.4054f;
yading@10 302 qc4 = scaled[i+3] * Q34 + 0.4054f;
yading@10 303
yading@10 304 __asm__ volatile (
yading@10 305 ".set push \n\t"
yading@10 306 ".set noreorder \n\t"
yading@10 307
yading@10 308 "ori $t4, $zero, 2 \n\t"
yading@10 309 "ori %[sign], $zero, 0 \n\t"
yading@10 310 "slt $t0, $t4, %[qc1] \n\t"
yading@10 311 "slt $t1, $t4, %[qc2] \n\t"
yading@10 312 "slt $t2, $t4, %[qc3] \n\t"
yading@10 313 "slt $t3, $t4, %[qc4] \n\t"
yading@10 314 "movn %[qc1], $t4, $t0 \n\t"
yading@10 315 "movn %[qc2], $t4, $t1 \n\t"
yading@10 316 "movn %[qc3], $t4, $t2 \n\t"
yading@10 317 "movn %[qc4], $t4, $t3 \n\t"
yading@10 318 "lw $t0, 0(%[in_int]) \n\t"
yading@10 319 "lw $t1, 4(%[in_int]) \n\t"
yading@10 320 "lw $t2, 8(%[in_int]) \n\t"
yading@10 321 "lw $t3, 12(%[in_int]) \n\t"
yading@10 322 "slt $t0, $t0, $zero \n\t"
yading@10 323 "movn %[sign], $t0, %[qc1] \n\t"
yading@10 324 "slt $t1, $t1, $zero \n\t"
yading@10 325 "slt $t2, $t2, $zero \n\t"
yading@10 326 "slt $t3, $t3, $zero \n\t"
yading@10 327 "sll $t0, %[sign], 1 \n\t"
yading@10 328 "or $t0, $t0, $t1 \n\t"
yading@10 329 "movn %[sign], $t0, %[qc2] \n\t"
yading@10 330 "slt $t4, $zero, %[qc1] \n\t"
yading@10 331 "slt $t1, $zero, %[qc2] \n\t"
yading@10 332 "slt %[count], $zero, %[qc3] \n\t"
yading@10 333 "sll $t0, %[sign], 1 \n\t"
yading@10 334 "or $t0, $t0, $t2 \n\t"
yading@10 335 "movn %[sign], $t0, %[qc3] \n\t"
yading@10 336 "slt $t2, $zero, %[qc4] \n\t"
yading@10 337 "addu %[count], %[count], $t4 \n\t"
yading@10 338 "addu %[count], %[count], $t1 \n\t"
yading@10 339 "sll $t0, %[sign], 1 \n\t"
yading@10 340 "or $t0, $t0, $t3 \n\t"
yading@10 341 "movn %[sign], $t0, %[qc4] \n\t"
yading@10 342 "addu %[count], %[count], $t2 \n\t"
yading@10 343
yading@10 344 ".set pop \n\t"
yading@10 345
yading@10 346 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
yading@10 347 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
yading@10 348 [sign]"=&r"(sign), [count]"=&r"(count)
yading@10 349 : [in_int]"r"(in_int)
yading@10 350 : "t0", "t1", "t2", "t3", "t4",
yading@10 351 "memory"
yading@10 352 );
yading@10 353
yading@10 354 curidx = qc1;
yading@10 355 curidx *= 3;
yading@10 356 curidx += qc2;
yading@10 357 curidx *= 3;
yading@10 358 curidx += qc3;
yading@10 359 curidx *= 3;
yading@10 360 curidx += qc4;
yading@10 361
yading@10 362 v_codes = (p_codes[curidx] << count) | (sign & ((1 << count) - 1));
yading@10 363 v_bits = p_bits[curidx] + count;
yading@10 364 put_bits(pb, v_bits, v_codes);
yading@10 365 }
yading@10 366 }
yading@10 367
yading@10 368 static void quantize_and_encode_band_cost_SPAIR_mips(struct AACEncContext *s,
yading@10 369 PutBitContext *pb, const float *in,
yading@10 370 const float *scaled, int size, int scale_idx,
yading@10 371 int cb, const float lambda, const float uplim,
yading@10 372 int *bits)
yading@10 373 {
yading@10 374 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
yading@10 375 int i;
yading@10 376 int qc1, qc2, qc3, qc4;
yading@10 377
yading@10 378 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
yading@10 379 uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
yading@10 380
yading@10 381 abs_pow34_v(s->scoefs, in, size);
yading@10 382 scaled = s->scoefs;
yading@10 383 for (i = 0; i < size; i += 4) {
yading@10 384 int curidx, curidx2;
yading@10 385 int *in_int = (int *)&in[i];
yading@10 386 uint8_t v_bits;
yading@10 387 unsigned int v_codes;
yading@10 388
yading@10 389 qc1 = scaled[i ] * Q34 + 0.4054f;
yading@10 390 qc2 = scaled[i+1] * Q34 + 0.4054f;
yading@10 391 qc3 = scaled[i+2] * Q34 + 0.4054f;
yading@10 392 qc4 = scaled[i+3] * Q34 + 0.4054f;
yading@10 393
yading@10 394 __asm__ volatile (
yading@10 395 ".set push \n\t"
yading@10 396 ".set noreorder \n\t"
yading@10 397
yading@10 398 "ori $t4, $zero, 4 \n\t"
yading@10 399 "slt $t0, $t4, %[qc1] \n\t"
yading@10 400 "slt $t1, $t4, %[qc2] \n\t"
yading@10 401 "slt $t2, $t4, %[qc3] \n\t"
yading@10 402 "slt $t3, $t4, %[qc4] \n\t"
yading@10 403 "movn %[qc1], $t4, $t0 \n\t"
yading@10 404 "movn %[qc2], $t4, $t1 \n\t"
yading@10 405 "movn %[qc3], $t4, $t2 \n\t"
yading@10 406 "movn %[qc4], $t4, $t3 \n\t"
yading@10 407 "lw $t0, 0(%[in_int]) \n\t"
yading@10 408 "lw $t1, 4(%[in_int]) \n\t"
yading@10 409 "lw $t2, 8(%[in_int]) \n\t"
yading@10 410 "lw $t3, 12(%[in_int]) \n\t"
yading@10 411 "srl $t0, $t0, 31 \n\t"
yading@10 412 "srl $t1, $t1, 31 \n\t"
yading@10 413 "srl $t2, $t2, 31 \n\t"
yading@10 414 "srl $t3, $t3, 31 \n\t"
yading@10 415 "subu $t4, $zero, %[qc1] \n\t"
yading@10 416 "subu $t5, $zero, %[qc2] \n\t"
yading@10 417 "subu $t6, $zero, %[qc3] \n\t"
yading@10 418 "subu $t7, $zero, %[qc4] \n\t"
yading@10 419 "movn %[qc1], $t4, $t0 \n\t"
yading@10 420 "movn %[qc2], $t5, $t1 \n\t"
yading@10 421 "movn %[qc3], $t6, $t2 \n\t"
yading@10 422 "movn %[qc4], $t7, $t3 \n\t"
yading@10 423
yading@10 424 ".set pop \n\t"
yading@10 425
yading@10 426 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
yading@10 427 [qc3]"+r"(qc3), [qc4]"+r"(qc4)
yading@10 428 : [in_int]"r"(in_int)
yading@10 429 : "t0", "t1", "t2", "t3",
yading@10 430 "t4", "t5", "t6", "t7",
yading@10 431 "memory"
yading@10 432 );
yading@10 433
yading@10 434 curidx = 9 * qc1;
yading@10 435 curidx += qc2 + 40;
yading@10 436
yading@10 437 curidx2 = 9 * qc3;
yading@10 438 curidx2 += qc4 + 40;
yading@10 439
yading@10 440 v_codes = (p_codes[curidx] << p_bits[curidx2]) | (p_codes[curidx2]);
yading@10 441 v_bits = p_bits[curidx] + p_bits[curidx2];
yading@10 442 put_bits(pb, v_bits, v_codes);
yading@10 443 }
yading@10 444 }
yading@10 445
yading@10 446 static void quantize_and_encode_band_cost_UPAIR7_mips(struct AACEncContext *s,
yading@10 447 PutBitContext *pb, const float *in,
yading@10 448 const float *scaled, int size, int scale_idx,
yading@10 449 int cb, const float lambda, const float uplim,
yading@10 450 int *bits)
yading@10 451 {
yading@10 452 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
yading@10 453 int i;
yading@10 454 int qc1, qc2, qc3, qc4;
yading@10 455
yading@10 456 uint8_t *p_bits = (uint8_t*) ff_aac_spectral_bits[cb-1];
yading@10 457 uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
yading@10 458
yading@10 459 abs_pow34_v(s->scoefs, in, size);
yading@10 460 scaled = s->scoefs;
yading@10 461 for (i = 0; i < size; i += 4) {
yading@10 462 int curidx, sign1, count1, sign2, count2;
yading@10 463 int *in_int = (int *)&in[i];
yading@10 464 uint8_t v_bits;
yading@10 465 unsigned int v_codes;
yading@10 466
yading@10 467 qc1 = scaled[i ] * Q34 + 0.4054f;
yading@10 468 qc2 = scaled[i+1] * Q34 + 0.4054f;
yading@10 469 qc3 = scaled[i+2] * Q34 + 0.4054f;
yading@10 470 qc4 = scaled[i+3] * Q34 + 0.4054f;
yading@10 471
yading@10 472 __asm__ volatile (
yading@10 473 ".set push \n\t"
yading@10 474 ".set noreorder \n\t"
yading@10 475
yading@10 476 "ori $t4, $zero, 7 \n\t"
yading@10 477 "ori %[sign1], $zero, 0 \n\t"
yading@10 478 "ori %[sign2], $zero, 0 \n\t"
yading@10 479 "slt $t0, $t4, %[qc1] \n\t"
yading@10 480 "slt $t1, $t4, %[qc2] \n\t"
yading@10 481 "slt $t2, $t4, %[qc3] \n\t"
yading@10 482 "slt $t3, $t4, %[qc4] \n\t"
yading@10 483 "movn %[qc1], $t4, $t0 \n\t"
yading@10 484 "movn %[qc2], $t4, $t1 \n\t"
yading@10 485 "movn %[qc3], $t4, $t2 \n\t"
yading@10 486 "movn %[qc4], $t4, $t3 \n\t"
yading@10 487 "lw $t0, 0(%[in_int]) \n\t"
yading@10 488 "lw $t1, 4(%[in_int]) \n\t"
yading@10 489 "lw $t2, 8(%[in_int]) \n\t"
yading@10 490 "lw $t3, 12(%[in_int]) \n\t"
yading@10 491 "slt $t0, $t0, $zero \n\t"
yading@10 492 "movn %[sign1], $t0, %[qc1] \n\t"
yading@10 493 "slt $t2, $t2, $zero \n\t"
yading@10 494 "movn %[sign2], $t2, %[qc3] \n\t"
yading@10 495 "slt $t1, $t1, $zero \n\t"
yading@10 496 "sll $t0, %[sign1], 1 \n\t"
yading@10 497 "or $t0, $t0, $t1 \n\t"
yading@10 498 "movn %[sign1], $t0, %[qc2] \n\t"
yading@10 499 "slt $t3, $t3, $zero \n\t"
yading@10 500 "sll $t0, %[sign2], 1 \n\t"
yading@10 501 "or $t0, $t0, $t3 \n\t"
yading@10 502 "movn %[sign2], $t0, %[qc4] \n\t"
yading@10 503 "slt %[count1], $zero, %[qc1] \n\t"
yading@10 504 "slt $t1, $zero, %[qc2] \n\t"
yading@10 505 "slt %[count2], $zero, %[qc3] \n\t"
yading@10 506 "slt $t2, $zero, %[qc4] \n\t"
yading@10 507 "addu %[count1], %[count1], $t1 \n\t"
yading@10 508 "addu %[count2], %[count2], $t2 \n\t"
yading@10 509
yading@10 510 ".set pop \n\t"
yading@10 511
yading@10 512 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
yading@10 513 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
yading@10 514 [sign1]"=&r"(sign1), [count1]"=&r"(count1),
yading@10 515 [sign2]"=&r"(sign2), [count2]"=&r"(count2)
yading@10 516 : [in_int]"r"(in_int)
yading@10 517 : "t0", "t1", "t2", "t3", "t4",
yading@10 518 "memory"
yading@10 519 );
yading@10 520
yading@10 521 curidx = 8 * qc1;
yading@10 522 curidx += qc2;
yading@10 523
yading@10 524 v_codes = (p_codes[curidx] << count1) | sign1;
yading@10 525 v_bits = p_bits[curidx] + count1;
yading@10 526 put_bits(pb, v_bits, v_codes);
yading@10 527
yading@10 528 curidx = 8 * qc3;
yading@10 529 curidx += qc4;
yading@10 530
yading@10 531 v_codes = (p_codes[curidx] << count2) | sign2;
yading@10 532 v_bits = p_bits[curidx] + count2;
yading@10 533 put_bits(pb, v_bits, v_codes);
yading@10 534 }
yading@10 535 }
yading@10 536
yading@10 537 static void quantize_and_encode_band_cost_UPAIR12_mips(struct AACEncContext *s,
yading@10 538 PutBitContext *pb, const float *in,
yading@10 539 const float *scaled, int size, int scale_idx,
yading@10 540 int cb, const float lambda, const float uplim,
yading@10 541 int *bits)
yading@10 542 {
yading@10 543 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
yading@10 544 int i;
yading@10 545 int qc1, qc2, qc3, qc4;
yading@10 546
yading@10 547 uint8_t *p_bits = (uint8_t*) ff_aac_spectral_bits[cb-1];
yading@10 548 uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
yading@10 549
yading@10 550 abs_pow34_v(s->scoefs, in, size);
yading@10 551 scaled = s->scoefs;
yading@10 552 for (i = 0; i < size; i += 4) {
yading@10 553 int curidx, sign1, count1, sign2, count2;
yading@10 554 int *in_int = (int *)&in[i];
yading@10 555 uint8_t v_bits;
yading@10 556 unsigned int v_codes;
yading@10 557
yading@10 558 qc1 = scaled[i ] * Q34 + 0.4054f;
yading@10 559 qc2 = scaled[i+1] * Q34 + 0.4054f;
yading@10 560 qc3 = scaled[i+2] * Q34 + 0.4054f;
yading@10 561 qc4 = scaled[i+3] * Q34 + 0.4054f;
yading@10 562
yading@10 563 __asm__ volatile (
yading@10 564 ".set push \n\t"
yading@10 565 ".set noreorder \n\t"
yading@10 566
yading@10 567 "ori $t4, $zero, 12 \n\t"
yading@10 568 "ori %[sign1], $zero, 0 \n\t"
yading@10 569 "ori %[sign2], $zero, 0 \n\t"
yading@10 570 "slt $t0, $t4, %[qc1] \n\t"
yading@10 571 "slt $t1, $t4, %[qc2] \n\t"
yading@10 572 "slt $t2, $t4, %[qc3] \n\t"
yading@10 573 "slt $t3, $t4, %[qc4] \n\t"
yading@10 574 "movn %[qc1], $t4, $t0 \n\t"
yading@10 575 "movn %[qc2], $t4, $t1 \n\t"
yading@10 576 "movn %[qc3], $t4, $t2 \n\t"
yading@10 577 "movn %[qc4], $t4, $t3 \n\t"
yading@10 578 "lw $t0, 0(%[in_int]) \n\t"
yading@10 579 "lw $t1, 4(%[in_int]) \n\t"
yading@10 580 "lw $t2, 8(%[in_int]) \n\t"
yading@10 581 "lw $t3, 12(%[in_int]) \n\t"
yading@10 582 "slt $t0, $t0, $zero \n\t"
yading@10 583 "movn %[sign1], $t0, %[qc1] \n\t"
yading@10 584 "slt $t2, $t2, $zero \n\t"
yading@10 585 "movn %[sign2], $t2, %[qc3] \n\t"
yading@10 586 "slt $t1, $t1, $zero \n\t"
yading@10 587 "sll $t0, %[sign1], 1 \n\t"
yading@10 588 "or $t0, $t0, $t1 \n\t"
yading@10 589 "movn %[sign1], $t0, %[qc2] \n\t"
yading@10 590 "slt $t3, $t3, $zero \n\t"
yading@10 591 "sll $t0, %[sign2], 1 \n\t"
yading@10 592 "or $t0, $t0, $t3 \n\t"
yading@10 593 "movn %[sign2], $t0, %[qc4] \n\t"
yading@10 594 "slt %[count1], $zero, %[qc1] \n\t"
yading@10 595 "slt $t1, $zero, %[qc2] \n\t"
yading@10 596 "slt %[count2], $zero, %[qc3] \n\t"
yading@10 597 "slt $t2, $zero, %[qc4] \n\t"
yading@10 598 "addu %[count1], %[count1], $t1 \n\t"
yading@10 599 "addu %[count2], %[count2], $t2 \n\t"
yading@10 600
yading@10 601 ".set pop \n\t"
yading@10 602
yading@10 603 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
yading@10 604 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
yading@10 605 [sign1]"=&r"(sign1), [count1]"=&r"(count1),
yading@10 606 [sign2]"=&r"(sign2), [count2]"=&r"(count2)
yading@10 607 : [in_int]"r"(in_int)
yading@10 608 : "t0", "t1", "t2", "t3", "t4",
yading@10 609 "memory"
yading@10 610 );
yading@10 611
yading@10 612 curidx = 13 * qc1;
yading@10 613 curidx += qc2;
yading@10 614
yading@10 615 v_codes = (p_codes[curidx] << count1) | sign1;
yading@10 616 v_bits = p_bits[curidx] + count1;
yading@10 617 put_bits(pb, v_bits, v_codes);
yading@10 618
yading@10 619 curidx = 13 * qc3;
yading@10 620 curidx += qc4;
yading@10 621
yading@10 622 v_codes = (p_codes[curidx] << count2) | sign2;
yading@10 623 v_bits = p_bits[curidx] + count2;
yading@10 624 put_bits(pb, v_bits, v_codes);
yading@10 625 }
yading@10 626 }
yading@10 627
yading@10 628 static void quantize_and_encode_band_cost_ESC_mips(struct AACEncContext *s,
yading@10 629 PutBitContext *pb, const float *in,
yading@10 630 const float *scaled, int size, int scale_idx,
yading@10 631 int cb, const float lambda, const float uplim,
yading@10 632 int *bits)
yading@10 633 {
yading@10 634 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
yading@10 635 int i;
yading@10 636 int qc1, qc2, qc3, qc4;
yading@10 637
yading@10 638 uint8_t *p_bits = (uint8_t* )ff_aac_spectral_bits[cb-1];
yading@10 639 uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
yading@10 640 float *p_vectors = (float* )ff_aac_codebook_vectors[cb-1];
yading@10 641
yading@10 642 abs_pow34_v(s->scoefs, in, size);
yading@10 643 scaled = s->scoefs;
yading@10 644
yading@10 645 if (cb < 11) {
yading@10 646 for (i = 0; i < size; i += 4) {
yading@10 647 int curidx, curidx2, sign1, count1, sign2, count2;
yading@10 648 int *in_int = (int *)&in[i];
yading@10 649 uint8_t v_bits;
yading@10 650 unsigned int v_codes;
yading@10 651
yading@10 652 qc1 = scaled[i ] * Q34 + 0.4054f;
yading@10 653 qc2 = scaled[i+1] * Q34 + 0.4054f;
yading@10 654 qc3 = scaled[i+2] * Q34 + 0.4054f;
yading@10 655 qc4 = scaled[i+3] * Q34 + 0.4054f;
yading@10 656
yading@10 657 __asm__ volatile (
yading@10 658 ".set push \n\t"
yading@10 659 ".set noreorder \n\t"
yading@10 660
yading@10 661 "ori $t4, $zero, 16 \n\t"
yading@10 662 "ori %[sign1], $zero, 0 \n\t"
yading@10 663 "ori %[sign2], $zero, 0 \n\t"
yading@10 664 "slt $t0, $t4, %[qc1] \n\t"
yading@10 665 "slt $t1, $t4, %[qc2] \n\t"
yading@10 666 "slt $t2, $t4, %[qc3] \n\t"
yading@10 667 "slt $t3, $t4, %[qc4] \n\t"
yading@10 668 "movn %[qc1], $t4, $t0 \n\t"
yading@10 669 "movn %[qc2], $t4, $t1 \n\t"
yading@10 670 "movn %[qc3], $t4, $t2 \n\t"
yading@10 671 "movn %[qc4], $t4, $t3 \n\t"
yading@10 672 "lw $t0, 0(%[in_int]) \n\t"
yading@10 673 "lw $t1, 4(%[in_int]) \n\t"
yading@10 674 "lw $t2, 8(%[in_int]) \n\t"
yading@10 675 "lw $t3, 12(%[in_int]) \n\t"
yading@10 676 "slt $t0, $t0, $zero \n\t"
yading@10 677 "movn %[sign1], $t0, %[qc1] \n\t"
yading@10 678 "slt $t2, $t2, $zero \n\t"
yading@10 679 "movn %[sign2], $t2, %[qc3] \n\t"
yading@10 680 "slt $t1, $t1, $zero \n\t"
yading@10 681 "sll $t0, %[sign1], 1 \n\t"
yading@10 682 "or $t0, $t0, $t1 \n\t"
yading@10 683 "movn %[sign1], $t0, %[qc2] \n\t"
yading@10 684 "slt $t3, $t3, $zero \n\t"
yading@10 685 "sll $t0, %[sign2], 1 \n\t"
yading@10 686 "or $t0, $t0, $t3 \n\t"
yading@10 687 "movn %[sign2], $t0, %[qc4] \n\t"
yading@10 688 "slt %[count1], $zero, %[qc1] \n\t"
yading@10 689 "slt $t1, $zero, %[qc2] \n\t"
yading@10 690 "slt %[count2], $zero, %[qc3] \n\t"
yading@10 691 "slt $t2, $zero, %[qc4] \n\t"
yading@10 692 "addu %[count1], %[count1], $t1 \n\t"
yading@10 693 "addu %[count2], %[count2], $t2 \n\t"
yading@10 694
yading@10 695 ".set pop \n\t"
yading@10 696
yading@10 697 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
yading@10 698 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
yading@10 699 [sign1]"=&r"(sign1), [count1]"=&r"(count1),
yading@10 700 [sign2]"=&r"(sign2), [count2]"=&r"(count2)
yading@10 701 : [in_int]"r"(in_int)
yading@10 702 : "t0", "t1", "t2", "t3", "t4",
yading@10 703 "memory"
yading@10 704 );
yading@10 705
yading@10 706 curidx = 17 * qc1;
yading@10 707 curidx += qc2;
yading@10 708 curidx2 = 17 * qc3;
yading@10 709 curidx2 += qc4;
yading@10 710
yading@10 711 v_codes = (p_codes[curidx] << count1) | sign1;
yading@10 712 v_bits = p_bits[curidx] + count1;
yading@10 713 put_bits(pb, v_bits, v_codes);
yading@10 714
yading@10 715 v_codes = (p_codes[curidx2] << count2) | sign2;
yading@10 716 v_bits = p_bits[curidx2] + count2;
yading@10 717 put_bits(pb, v_bits, v_codes);
yading@10 718 }
yading@10 719 } else {
yading@10 720 for (i = 0; i < size; i += 4) {
yading@10 721 int curidx, curidx2, sign1, count1, sign2, count2;
yading@10 722 int *in_int = (int *)&in[i];
yading@10 723 uint8_t v_bits;
yading@10 724 unsigned int v_codes;
yading@10 725 int c1, c2, c3, c4;
yading@10 726
yading@10 727 qc1 = scaled[i ] * Q34 + 0.4054f;
yading@10 728 qc2 = scaled[i+1] * Q34 + 0.4054f;
yading@10 729 qc3 = scaled[i+2] * Q34 + 0.4054f;
yading@10 730 qc4 = scaled[i+3] * Q34 + 0.4054f;
yading@10 731
yading@10 732 __asm__ volatile (
yading@10 733 ".set push \n\t"
yading@10 734 ".set noreorder \n\t"
yading@10 735
yading@10 736 "ori $t4, $zero, 16 \n\t"
yading@10 737 "ori %[sign1], $zero, 0 \n\t"
yading@10 738 "ori %[sign2], $zero, 0 \n\t"
yading@10 739 "shll_s.w %[c1], %[qc1], 18 \n\t"
yading@10 740 "shll_s.w %[c2], %[qc2], 18 \n\t"
yading@10 741 "shll_s.w %[c3], %[qc3], 18 \n\t"
yading@10 742 "shll_s.w %[c4], %[qc4], 18 \n\t"
yading@10 743 "srl %[c1], %[c1], 18 \n\t"
yading@10 744 "srl %[c2], %[c2], 18 \n\t"
yading@10 745 "srl %[c3], %[c3], 18 \n\t"
yading@10 746 "srl %[c4], %[c4], 18 \n\t"
yading@10 747 "slt $t0, $t4, %[qc1] \n\t"
yading@10 748 "slt $t1, $t4, %[qc2] \n\t"
yading@10 749 "slt $t2, $t4, %[qc3] \n\t"
yading@10 750 "slt $t3, $t4, %[qc4] \n\t"
yading@10 751 "movn %[qc1], $t4, $t0 \n\t"
yading@10 752 "movn %[qc2], $t4, $t1 \n\t"
yading@10 753 "movn %[qc3], $t4, $t2 \n\t"
yading@10 754 "movn %[qc4], $t4, $t3 \n\t"
yading@10 755 "lw $t0, 0(%[in_int]) \n\t"
yading@10 756 "lw $t1, 4(%[in_int]) \n\t"
yading@10 757 "lw $t2, 8(%[in_int]) \n\t"
yading@10 758 "lw $t3, 12(%[in_int]) \n\t"
yading@10 759 "slt $t0, $t0, $zero \n\t"
yading@10 760 "movn %[sign1], $t0, %[qc1] \n\t"
yading@10 761 "slt $t2, $t2, $zero \n\t"
yading@10 762 "movn %[sign2], $t2, %[qc3] \n\t"
yading@10 763 "slt $t1, $t1, $zero \n\t"
yading@10 764 "sll $t0, %[sign1], 1 \n\t"
yading@10 765 "or $t0, $t0, $t1 \n\t"
yading@10 766 "movn %[sign1], $t0, %[qc2] \n\t"
yading@10 767 "slt $t3, $t3, $zero \n\t"
yading@10 768 "sll $t0, %[sign2], 1 \n\t"
yading@10 769 "or $t0, $t0, $t3 \n\t"
yading@10 770 "movn %[sign2], $t0, %[qc4] \n\t"
yading@10 771 "slt %[count1], $zero, %[qc1] \n\t"
yading@10 772 "slt $t1, $zero, %[qc2] \n\t"
yading@10 773 "slt %[count2], $zero, %[qc3] \n\t"
yading@10 774 "slt $t2, $zero, %[qc4] \n\t"
yading@10 775 "addu %[count1], %[count1], $t1 \n\t"
yading@10 776 "addu %[count2], %[count2], $t2 \n\t"
yading@10 777
yading@10 778 ".set pop \n\t"
yading@10 779
yading@10 780 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
yading@10 781 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
yading@10 782 [sign1]"=&r"(sign1), [count1]"=&r"(count1),
yading@10 783 [sign2]"=&r"(sign2), [count2]"=&r"(count2),
yading@10 784 [c1]"=&r"(c1), [c2]"=&r"(c2),
yading@10 785 [c3]"=&r"(c3), [c4]"=&r"(c4)
yading@10 786 : [in_int]"r"(in_int)
yading@10 787 : "t0", "t1", "t2", "t3", "t4",
yading@10 788 "memory"
yading@10 789 );
yading@10 790
yading@10 791 curidx = 17 * qc1;
yading@10 792 curidx += qc2;
yading@10 793
yading@10 794 curidx2 = 17 * qc3;
yading@10 795 curidx2 += qc4;
yading@10 796
yading@10 797 v_codes = (p_codes[curidx] << count1) | sign1;
yading@10 798 v_bits = p_bits[curidx] + count1;
yading@10 799 put_bits(pb, v_bits, v_codes);
yading@10 800
yading@10 801 if (p_vectors[curidx*2 ] == 64.0f) {
yading@10 802 int len = av_log2(c1);
yading@10 803 v_codes = (((1 << (len - 3)) - 2) << len) | (c1 & ((1 << len) - 1));
yading@10 804 put_bits(pb, len * 2 - 3, v_codes);
yading@10 805 }
yading@10 806 if (p_vectors[curidx*2+1] == 64.0f) {
yading@10 807 int len = av_log2(c2);
yading@10 808 v_codes = (((1 << (len - 3)) - 2) << len) | (c2 & ((1 << len) - 1));
yading@10 809 put_bits(pb, len*2-3, v_codes);
yading@10 810 }
yading@10 811
yading@10 812 v_codes = (p_codes[curidx2] << count2) | sign2;
yading@10 813 v_bits = p_bits[curidx2] + count2;
yading@10 814 put_bits(pb, v_bits, v_codes);
yading@10 815
yading@10 816 if (p_vectors[curidx2*2 ] == 64.0f) {
yading@10 817 int len = av_log2(c3);
yading@10 818 v_codes = (((1 << (len - 3)) - 2) << len) | (c3 & ((1 << len) - 1));
yading@10 819 put_bits(pb, len* 2 - 3, v_codes);
yading@10 820 }
yading@10 821 if (p_vectors[curidx2*2+1] == 64.0f) {
yading@10 822 int len = av_log2(c4);
yading@10 823 v_codes = (((1 << (len - 3)) - 2) << len) | (c4 & ((1 << len) - 1));
yading@10 824 put_bits(pb, len * 2 - 3, v_codes);
yading@10 825 }
yading@10 826 }
yading@10 827 }
yading@10 828 }
yading@10 829
yading@10 830 static void (*const quantize_and_encode_band_cost_arr[])(struct AACEncContext *s,
yading@10 831 PutBitContext *pb, const float *in,
yading@10 832 const float *scaled, int size, int scale_idx,
yading@10 833 int cb, const float lambda, const float uplim,
yading@10 834 int *bits) = {
yading@10 835 NULL,
yading@10 836 quantize_and_encode_band_cost_SQUAD_mips,
yading@10 837 quantize_and_encode_band_cost_SQUAD_mips,
yading@10 838 quantize_and_encode_band_cost_UQUAD_mips,
yading@10 839 quantize_and_encode_band_cost_UQUAD_mips,
yading@10 840 quantize_and_encode_band_cost_SPAIR_mips,
yading@10 841 quantize_and_encode_band_cost_SPAIR_mips,
yading@10 842 quantize_and_encode_band_cost_UPAIR7_mips,
yading@10 843 quantize_and_encode_band_cost_UPAIR7_mips,
yading@10 844 quantize_and_encode_band_cost_UPAIR12_mips,
yading@10 845 quantize_and_encode_band_cost_UPAIR12_mips,
yading@10 846 quantize_and_encode_band_cost_ESC_mips,
yading@10 847 };
yading@10 848
yading@10 849 #define quantize_and_encode_band_cost( \
yading@10 850 s, pb, in, scaled, size, scale_idx, cb, \
yading@10 851 lambda, uplim, bits) \
yading@10 852 quantize_and_encode_band_cost_arr[cb]( \
yading@10 853 s, pb, in, scaled, size, scale_idx, cb, \
yading@10 854 lambda, uplim, bits)
yading@10 855
yading@10 856 static void quantize_and_encode_band_mips(struct AACEncContext *s, PutBitContext *pb,
yading@10 857 const float *in, int size, int scale_idx,
yading@10 858 int cb, const float lambda)
yading@10 859 {
yading@10 860 quantize_and_encode_band_cost(s, pb, in, NULL, size, scale_idx, cb, lambda,
yading@10 861 INFINITY, NULL);
yading@10 862 }
yading@10 863
yading@10 864 /**
yading@10 865 * Functions developed from template function and optimized for getting the number of bits
yading@10 866 */
yading@10 867 static float get_band_numbits_ZERO_mips(struct AACEncContext *s,
yading@10 868 PutBitContext *pb, const float *in,
yading@10 869 const float *scaled, int size, int scale_idx,
yading@10 870 int cb, const float lambda, const float uplim,
yading@10 871 int *bits)
yading@10 872 {
yading@10 873 return 0;
yading@10 874 }
yading@10 875
yading@10 876 static float get_band_numbits_SQUAD_mips(struct AACEncContext *s,
yading@10 877 PutBitContext *pb, const float *in,
yading@10 878 const float *scaled, int size, int scale_idx,
yading@10 879 int cb, const float lambda, const float uplim,
yading@10 880 int *bits)
yading@10 881 {
yading@10 882 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
yading@10 883 int i;
yading@10 884 int qc1, qc2, qc3, qc4;
yading@10 885 int curbits = 0;
yading@10 886
yading@10 887 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
yading@10 888
yading@10 889 for (i = 0; i < size; i += 4) {
yading@10 890 int curidx;
yading@10 891 int *in_int = (int *)&in[i];
yading@10 892
yading@10 893 qc1 = scaled[i ] * Q34 + 0.4054f;
yading@10 894 qc2 = scaled[i+1] * Q34 + 0.4054f;
yading@10 895 qc3 = scaled[i+2] * Q34 + 0.4054f;
yading@10 896 qc4 = scaled[i+3] * Q34 + 0.4054f;
yading@10 897
yading@10 898 __asm__ volatile (
yading@10 899 ".set push \n\t"
yading@10 900 ".set noreorder \n\t"
yading@10 901
yading@10 902 "slt %[qc1], $zero, %[qc1] \n\t"
yading@10 903 "slt %[qc2], $zero, %[qc2] \n\t"
yading@10 904 "slt %[qc3], $zero, %[qc3] \n\t"
yading@10 905 "slt %[qc4], $zero, %[qc4] \n\t"
yading@10 906 "lw $t0, 0(%[in_int]) \n\t"
yading@10 907 "lw $t1, 4(%[in_int]) \n\t"
yading@10 908 "lw $t2, 8(%[in_int]) \n\t"
yading@10 909 "lw $t3, 12(%[in_int]) \n\t"
yading@10 910 "srl $t0, $t0, 31 \n\t"
yading@10 911 "srl $t1, $t1, 31 \n\t"
yading@10 912 "srl $t2, $t2, 31 \n\t"
yading@10 913 "srl $t3, $t3, 31 \n\t"
yading@10 914 "subu $t4, $zero, %[qc1] \n\t"
yading@10 915 "subu $t5, $zero, %[qc2] \n\t"
yading@10 916 "subu $t6, $zero, %[qc3] \n\t"
yading@10 917 "subu $t7, $zero, %[qc4] \n\t"
yading@10 918 "movn %[qc1], $t4, $t0 \n\t"
yading@10 919 "movn %[qc2], $t5, $t1 \n\t"
yading@10 920 "movn %[qc3], $t6, $t2 \n\t"
yading@10 921 "movn %[qc4], $t7, $t3 \n\t"
yading@10 922
yading@10 923 ".set pop \n\t"
yading@10 924
yading@10 925 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
yading@10 926 [qc3]"+r"(qc3), [qc4]"+r"(qc4)
yading@10 927 : [in_int]"r"(in_int)
yading@10 928 : "t0", "t1", "t2", "t3",
yading@10 929 "t4", "t5", "t6", "t7",
yading@10 930 "memory"
yading@10 931 );
yading@10 932
yading@10 933 curidx = qc1;
yading@10 934 curidx *= 3;
yading@10 935 curidx += qc2;
yading@10 936 curidx *= 3;
yading@10 937 curidx += qc3;
yading@10 938 curidx *= 3;
yading@10 939 curidx += qc4;
yading@10 940 curidx += 40;
yading@10 941
yading@10 942 curbits += p_bits[curidx];
yading@10 943 }
yading@10 944 return curbits;
yading@10 945 }
yading@10 946
yading@10 947 static float get_band_numbits_UQUAD_mips(struct AACEncContext *s,
yading@10 948 PutBitContext *pb, const float *in,
yading@10 949 const float *scaled, int size, int scale_idx,
yading@10 950 int cb, const float lambda, const float uplim,
yading@10 951 int *bits)
yading@10 952 {
yading@10 953 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
yading@10 954 int i;
yading@10 955 int curbits = 0;
yading@10 956 int qc1, qc2, qc3, qc4;
yading@10 957
yading@10 958 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
yading@10 959
yading@10 960 for (i = 0; i < size; i += 4) {
yading@10 961 int curidx;
yading@10 962
yading@10 963 qc1 = scaled[i ] * Q34 + 0.4054f;
yading@10 964 qc2 = scaled[i+1] * Q34 + 0.4054f;
yading@10 965 qc3 = scaled[i+2] * Q34 + 0.4054f;
yading@10 966 qc4 = scaled[i+3] * Q34 + 0.4054f;
yading@10 967
yading@10 968 __asm__ volatile (
yading@10 969 ".set push \n\t"
yading@10 970 ".set noreorder \n\t"
yading@10 971
yading@10 972 "ori $t4, $zero, 2 \n\t"
yading@10 973 "slt $t0, $t4, %[qc1] \n\t"
yading@10 974 "slt $t1, $t4, %[qc2] \n\t"
yading@10 975 "slt $t2, $t4, %[qc3] \n\t"
yading@10 976 "slt $t3, $t4, %[qc4] \n\t"
yading@10 977 "movn %[qc1], $t4, $t0 \n\t"
yading@10 978 "movn %[qc2], $t4, $t1 \n\t"
yading@10 979 "movn %[qc3], $t4, $t2 \n\t"
yading@10 980 "movn %[qc4], $t4, $t3 \n\t"
yading@10 981
yading@10 982 ".set pop \n\t"
yading@10 983
yading@10 984 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
yading@10 985 [qc3]"+r"(qc3), [qc4]"+r"(qc4)
yading@10 986 :
yading@10 987 : "t0", "t1", "t2", "t3", "t4"
yading@10 988 );
yading@10 989
yading@10 990 curidx = qc1;
yading@10 991 curidx *= 3;
yading@10 992 curidx += qc2;
yading@10 993 curidx *= 3;
yading@10 994 curidx += qc3;
yading@10 995 curidx *= 3;
yading@10 996 curidx += qc4;
yading@10 997
yading@10 998 curbits += p_bits[curidx];
yading@10 999 curbits += uquad_sign_bits[curidx];
yading@10 1000 }
yading@10 1001 return curbits;
yading@10 1002 }
yading@10 1003
yading@10 1004 static float get_band_numbits_SPAIR_mips(struct AACEncContext *s,
yading@10 1005 PutBitContext *pb, const float *in,
yading@10 1006 const float *scaled, int size, int scale_idx,
yading@10 1007 int cb, const float lambda, const float uplim,
yading@10 1008 int *bits)
yading@10 1009 {
yading@10 1010 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
yading@10 1011 int i;
yading@10 1012 int qc1, qc2, qc3, qc4;
yading@10 1013 int curbits = 0;
yading@10 1014
yading@10 1015 uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
yading@10 1016
yading@10 1017 for (i = 0; i < size; i += 4) {
yading@10 1018 int curidx, curidx2;
yading@10 1019 int *in_int = (int *)&in[i];
yading@10 1020
yading@10 1021 qc1 = scaled[i ] * Q34 + 0.4054f;
yading@10 1022 qc2 = scaled[i+1] * Q34 + 0.4054f;
yading@10 1023 qc3 = scaled[i+2] * Q34 + 0.4054f;
yading@10 1024 qc4 = scaled[i+3] * Q34 + 0.4054f;
yading@10 1025
yading@10 1026 __asm__ volatile (
yading@10 1027 ".set push \n\t"
yading@10 1028 ".set noreorder \n\t"
yading@10 1029
yading@10 1030 "ori $t4, $zero, 4 \n\t"
yading@10 1031 "slt $t0, $t4, %[qc1] \n\t"
yading@10 1032 "slt $t1, $t4, %[qc2] \n\t"
yading@10 1033 "slt $t2, $t4, %[qc3] \n\t"
yading@10 1034 "slt $t3, $t4, %[qc4] \n\t"
yading@10 1035 "movn %[qc1], $t4, $t0 \n\t"
yading@10 1036 "movn %[qc2], $t4, $t1 \n\t"
yading@10 1037 "movn %[qc3], $t4, $t2 \n\t"
yading@10 1038 "movn %[qc4], $t4, $t3 \n\t"
yading@10 1039 "lw $t0, 0(%[in_int]) \n\t"
yading@10 1040 "lw $t1, 4(%[in_int]) \n\t"
yading@10 1041 "lw $t2, 8(%[in_int]) \n\t"
yading@10 1042 "lw $t3, 12(%[in_int]) \n\t"
yading@10 1043 "srl $t0, $t0, 31 \n\t"
yading@10 1044 "srl $t1, $t1, 31 \n\t"
yading@10 1045 "srl $t2, $t2, 31 \n\t"
yading@10 1046 "srl $t3, $t3, 31 \n\t"
yading@10 1047 "subu $t4, $zero, %[qc1] \n\t"
yading@10 1048 "subu $t5, $zero, %[qc2] \n\t"
yading@10 1049 "subu $t6, $zero, %[qc3] \n\t"
yading@10 1050 "subu $t7, $zero, %[qc4] \n\t"
yading@10 1051 "movn %[qc1], $t4, $t0 \n\t"
yading@10 1052 "movn %[qc2], $t5, $t1 \n\t"
yading@10 1053 "movn %[qc3], $t6, $t2 \n\t"
yading@10 1054 "movn %[qc4], $t7, $t3 \n\t"
yading@10 1055
yading@10 1056 ".set pop \n\t"
yading@10 1057
yading@10 1058 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
yading@10 1059 [qc3]"+r"(qc3), [qc4]"+r"(qc4)
yading@10 1060 : [in_int]"r"(in_int)
yading@10 1061 : "t0", "t1", "t2", "t3",
yading@10 1062 "t4", "t5", "t6", "t7",
yading@10 1063 "memory"
yading@10 1064 );
yading@10 1065
yading@10 1066 curidx = 9 * qc1;
yading@10 1067 curidx += qc2 + 40;
yading@10 1068
yading@10 1069 curidx2 = 9 * qc3;
yading@10 1070 curidx2 += qc4 + 40;
yading@10 1071
yading@10 1072 curbits += p_bits[curidx] + p_bits[curidx2];
yading@10 1073 }
yading@10 1074 return curbits;
yading@10 1075 }
yading@10 1076
yading@10 1077 static float get_band_numbits_UPAIR7_mips(struct AACEncContext *s,
yading@10 1078 PutBitContext *pb, const float *in,
yading@10 1079 const float *scaled, int size, int scale_idx,
yading@10 1080 int cb, const float lambda, const float uplim,
yading@10 1081 int *bits)
yading@10 1082 {
yading@10 1083 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
yading@10 1084 int i;
yading@10 1085 int qc1, qc2, qc3, qc4;
yading@10 1086 int curbits = 0;
yading@10 1087
yading@10 1088 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
yading@10 1089
yading@10 1090 for (i = 0; i < size; i += 4) {
yading@10 1091 int curidx, curidx2;
yading@10 1092
yading@10 1093 qc1 = scaled[i ] * Q34 + 0.4054f;
yading@10 1094 qc2 = scaled[i+1] * Q34 + 0.4054f;
yading@10 1095 qc3 = scaled[i+2] * Q34 + 0.4054f;
yading@10 1096 qc4 = scaled[i+3] * Q34 + 0.4054f;
yading@10 1097
yading@10 1098 __asm__ volatile (
yading@10 1099 ".set push \n\t"
yading@10 1100 ".set noreorder \n\t"
yading@10 1101
yading@10 1102 "ori $t4, $zero, 7 \n\t"
yading@10 1103 "slt $t0, $t4, %[qc1] \n\t"
yading@10 1104 "slt $t1, $t4, %[qc2] \n\t"
yading@10 1105 "slt $t2, $t4, %[qc3] \n\t"
yading@10 1106 "slt $t3, $t4, %[qc4] \n\t"
yading@10 1107 "movn %[qc1], $t4, $t0 \n\t"
yading@10 1108 "movn %[qc2], $t4, $t1 \n\t"
yading@10 1109 "movn %[qc3], $t4, $t2 \n\t"
yading@10 1110 "movn %[qc4], $t4, $t3 \n\t"
yading@10 1111
yading@10 1112 ".set pop \n\t"
yading@10 1113
yading@10 1114 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
yading@10 1115 [qc3]"+r"(qc3), [qc4]"+r"(qc4)
yading@10 1116 :
yading@10 1117 : "t0", "t1", "t2", "t3", "t4"
yading@10 1118 );
yading@10 1119
yading@10 1120 curidx = 8 * qc1;
yading@10 1121 curidx += qc2;
yading@10 1122
yading@10 1123 curidx2 = 8 * qc3;
yading@10 1124 curidx2 += qc4;
yading@10 1125
yading@10 1126 curbits += p_bits[curidx] +
yading@10 1127 upair7_sign_bits[curidx] +
yading@10 1128 p_bits[curidx2] +
yading@10 1129 upair7_sign_bits[curidx2];
yading@10 1130 }
yading@10 1131 return curbits;
yading@10 1132 }
yading@10 1133
yading@10 1134 static float get_band_numbits_UPAIR12_mips(struct AACEncContext *s,
yading@10 1135 PutBitContext *pb, const float *in,
yading@10 1136 const float *scaled, int size, int scale_idx,
yading@10 1137 int cb, const float lambda, const float uplim,
yading@10 1138 int *bits)
yading@10 1139 {
yading@10 1140 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
yading@10 1141 int i;
yading@10 1142 int qc1, qc2, qc3, qc4;
yading@10 1143 int curbits = 0;
yading@10 1144
yading@10 1145 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
yading@10 1146
yading@10 1147 for (i = 0; i < size; i += 4) {
yading@10 1148 int curidx, curidx2;
yading@10 1149
yading@10 1150 qc1 = scaled[i ] * Q34 + 0.4054f;
yading@10 1151 qc2 = scaled[i+1] * Q34 + 0.4054f;
yading@10 1152 qc3 = scaled[i+2] * Q34 + 0.4054f;
yading@10 1153 qc4 = scaled[i+3] * Q34 + 0.4054f;
yading@10 1154
yading@10 1155 __asm__ volatile (
yading@10 1156 ".set push \n\t"
yading@10 1157 ".set noreorder \n\t"
yading@10 1158
yading@10 1159 "ori $t4, $zero, 12 \n\t"
yading@10 1160 "slt $t0, $t4, %[qc1] \n\t"
yading@10 1161 "slt $t1, $t4, %[qc2] \n\t"
yading@10 1162 "slt $t2, $t4, %[qc3] \n\t"
yading@10 1163 "slt $t3, $t4, %[qc4] \n\t"
yading@10 1164 "movn %[qc1], $t4, $t0 \n\t"
yading@10 1165 "movn %[qc2], $t4, $t1 \n\t"
yading@10 1166 "movn %[qc3], $t4, $t2 \n\t"
yading@10 1167 "movn %[qc4], $t4, $t3 \n\t"
yading@10 1168
yading@10 1169 ".set pop \n\t"
yading@10 1170
yading@10 1171 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
yading@10 1172 [qc3]"+r"(qc3), [qc4]"+r"(qc4)
yading@10 1173 :
yading@10 1174 : "t0", "t1", "t2", "t3", "t4"
yading@10 1175 );
yading@10 1176
yading@10 1177 curidx = 13 * qc1;
yading@10 1178 curidx += qc2;
yading@10 1179
yading@10 1180 curidx2 = 13 * qc3;
yading@10 1181 curidx2 += qc4;
yading@10 1182
yading@10 1183 curbits += p_bits[curidx] +
yading@10 1184 p_bits[curidx2] +
yading@10 1185 upair12_sign_bits[curidx] +
yading@10 1186 upair12_sign_bits[curidx2];
yading@10 1187 }
yading@10 1188 return curbits;
yading@10 1189 }
yading@10 1190
yading@10 1191 static float get_band_numbits_ESC_mips(struct AACEncContext *s,
yading@10 1192 PutBitContext *pb, const float *in,
yading@10 1193 const float *scaled, int size, int scale_idx,
yading@10 1194 int cb, const float lambda, const float uplim,
yading@10 1195 int *bits)
yading@10 1196 {
yading@10 1197 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
yading@10 1198 int i;
yading@10 1199 int qc1, qc2, qc3, qc4;
yading@10 1200 int curbits = 0;
yading@10 1201
yading@10 1202 uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
yading@10 1203
yading@10 1204 for (i = 0; i < size; i += 4) {
yading@10 1205 int curidx, curidx2;
yading@10 1206 int cond0, cond1, cond2, cond3;
yading@10 1207 int c1, c2, c3, c4;
yading@10 1208
yading@10 1209 qc1 = scaled[i ] * Q34 + 0.4054f;
yading@10 1210 qc2 = scaled[i+1] * Q34 + 0.4054f;
yading@10 1211 qc3 = scaled[i+2] * Q34 + 0.4054f;
yading@10 1212 qc4 = scaled[i+3] * Q34 + 0.4054f;
yading@10 1213
yading@10 1214 __asm__ volatile (
yading@10 1215 ".set push \n\t"
yading@10 1216 ".set noreorder \n\t"
yading@10 1217
yading@10 1218 "ori $t4, $zero, 15 \n\t"
yading@10 1219 "ori $t5, $zero, 16 \n\t"
yading@10 1220 "shll_s.w %[c1], %[qc1], 18 \n\t"
yading@10 1221 "shll_s.w %[c2], %[qc2], 18 \n\t"
yading@10 1222 "shll_s.w %[c3], %[qc3], 18 \n\t"
yading@10 1223 "shll_s.w %[c4], %[qc4], 18 \n\t"
yading@10 1224 "srl %[c1], %[c1], 18 \n\t"
yading@10 1225 "srl %[c2], %[c2], 18 \n\t"
yading@10 1226 "srl %[c3], %[c3], 18 \n\t"
yading@10 1227 "srl %[c4], %[c4], 18 \n\t"
yading@10 1228 "slt %[cond0], $t4, %[qc1] \n\t"
yading@10 1229 "slt %[cond1], $t4, %[qc2] \n\t"
yading@10 1230 "slt %[cond2], $t4, %[qc3] \n\t"
yading@10 1231 "slt %[cond3], $t4, %[qc4] \n\t"
yading@10 1232 "movn %[qc1], $t5, %[cond0] \n\t"
yading@10 1233 "movn %[qc2], $t5, %[cond1] \n\t"
yading@10 1234 "movn %[qc3], $t5, %[cond2] \n\t"
yading@10 1235 "movn %[qc4], $t5, %[cond3] \n\t"
yading@10 1236 "ori $t5, $zero, 31 \n\t"
yading@10 1237 "clz %[c1], %[c1] \n\t"
yading@10 1238 "clz %[c2], %[c2] \n\t"
yading@10 1239 "clz %[c3], %[c3] \n\t"
yading@10 1240 "clz %[c4], %[c4] \n\t"
yading@10 1241 "subu %[c1], $t5, %[c1] \n\t"
yading@10 1242 "subu %[c2], $t5, %[c2] \n\t"
yading@10 1243 "subu %[c3], $t5, %[c3] \n\t"
yading@10 1244 "subu %[c4], $t5, %[c4] \n\t"
yading@10 1245 "sll %[c1], %[c1], 1 \n\t"
yading@10 1246 "sll %[c2], %[c2], 1 \n\t"
yading@10 1247 "sll %[c3], %[c3], 1 \n\t"
yading@10 1248 "sll %[c4], %[c4], 1 \n\t"
yading@10 1249 "addiu %[c1], %[c1], -3 \n\t"
yading@10 1250 "addiu %[c2], %[c2], -3 \n\t"
yading@10 1251 "addiu %[c3], %[c3], -3 \n\t"
yading@10 1252 "addiu %[c4], %[c4], -3 \n\t"
yading@10 1253 "subu %[cond0], $zero, %[cond0] \n\t"
yading@10 1254 "subu %[cond1], $zero, %[cond1] \n\t"
yading@10 1255 "subu %[cond2], $zero, %[cond2] \n\t"
yading@10 1256 "subu %[cond3], $zero, %[cond3] \n\t"
yading@10 1257 "and %[c1], %[c1], %[cond0] \n\t"
yading@10 1258 "and %[c2], %[c2], %[cond1] \n\t"
yading@10 1259 "and %[c3], %[c3], %[cond2] \n\t"
yading@10 1260 "and %[c4], %[c4], %[cond3] \n\t"
yading@10 1261
yading@10 1262 ".set pop \n\t"
yading@10 1263
yading@10 1264 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
yading@10 1265 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
yading@10 1266 [cond0]"=&r"(cond0), [cond1]"=&r"(cond1),
yading@10 1267 [cond2]"=&r"(cond2), [cond3]"=&r"(cond3),
yading@10 1268 [c1]"=&r"(c1), [c2]"=&r"(c2),
yading@10 1269 [c3]"=&r"(c3), [c4]"=&r"(c4)
yading@10 1270 :
yading@10 1271 : "t4", "t5"
yading@10 1272 );
yading@10 1273
yading@10 1274 curidx = 17 * qc1;
yading@10 1275 curidx += qc2;
yading@10 1276
yading@10 1277 curidx2 = 17 * qc3;
yading@10 1278 curidx2 += qc4;
yading@10 1279
yading@10 1280 curbits += p_bits[curidx];
yading@10 1281 curbits += esc_sign_bits[curidx];
yading@10 1282 curbits += p_bits[curidx2];
yading@10 1283 curbits += esc_sign_bits[curidx2];
yading@10 1284
yading@10 1285 curbits += c1;
yading@10 1286 curbits += c2;
yading@10 1287 curbits += c3;
yading@10 1288 curbits += c4;
yading@10 1289 }
yading@10 1290 return curbits;
yading@10 1291 }
yading@10 1292
yading@10 1293 static float (*const get_band_numbits_arr[])(struct AACEncContext *s,
yading@10 1294 PutBitContext *pb, const float *in,
yading@10 1295 const float *scaled, int size, int scale_idx,
yading@10 1296 int cb, const float lambda, const float uplim,
yading@10 1297 int *bits) = {
yading@10 1298 get_band_numbits_ZERO_mips,
yading@10 1299 get_band_numbits_SQUAD_mips,
yading@10 1300 get_band_numbits_SQUAD_mips,
yading@10 1301 get_band_numbits_UQUAD_mips,
yading@10 1302 get_band_numbits_UQUAD_mips,
yading@10 1303 get_band_numbits_SPAIR_mips,
yading@10 1304 get_band_numbits_SPAIR_mips,
yading@10 1305 get_band_numbits_UPAIR7_mips,
yading@10 1306 get_band_numbits_UPAIR7_mips,
yading@10 1307 get_band_numbits_UPAIR12_mips,
yading@10 1308 get_band_numbits_UPAIR12_mips,
yading@10 1309 get_band_numbits_ESC_mips,
yading@10 1310 };
yading@10 1311
yading@10 1312 #define get_band_numbits( \
yading@10 1313 s, pb, in, scaled, size, scale_idx, cb, \
yading@10 1314 lambda, uplim, bits) \
yading@10 1315 get_band_numbits_arr[cb]( \
yading@10 1316 s, pb, in, scaled, size, scale_idx, cb, \
yading@10 1317 lambda, uplim, bits)
yading@10 1318
yading@10 1319 static float quantize_band_cost_bits(struct AACEncContext *s, const float *in,
yading@10 1320 const float *scaled, int size, int scale_idx,
yading@10 1321 int cb, const float lambda, const float uplim,
yading@10 1322 int *bits)
yading@10 1323 {
yading@10 1324 return get_band_numbits(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits);
yading@10 1325 }
yading@10 1326
yading@10 1327 /**
yading@10 1328 * Functions developed from template function and optimized for getting the band cost
yading@10 1329 */
yading@10 1330 #if HAVE_MIPSFPU
yading@10 1331 static float get_band_cost_ZERO_mips(struct AACEncContext *s,
yading@10 1332 PutBitContext *pb, const float *in,
yading@10 1333 const float *scaled, int size, int scale_idx,
yading@10 1334 int cb, const float lambda, const float uplim,
yading@10 1335 int *bits)
yading@10 1336 {
yading@10 1337 int i;
yading@10 1338 float cost = 0;
yading@10 1339
yading@10 1340 for (i = 0; i < size; i += 4) {
yading@10 1341 cost += in[i ] * in[i ];
yading@10 1342 cost += in[i+1] * in[i+1];
yading@10 1343 cost += in[i+2] * in[i+2];
yading@10 1344 cost += in[i+3] * in[i+3];
yading@10 1345 }
yading@10 1346 if (bits)
yading@10 1347 *bits = 0;
yading@10 1348 return cost * lambda;
yading@10 1349 }
yading@10 1350
yading@10 1351 static float get_band_cost_SQUAD_mips(struct AACEncContext *s,
yading@10 1352 PutBitContext *pb, const float *in,
yading@10 1353 const float *scaled, int size, int scale_idx,
yading@10 1354 int cb, const float lambda, const float uplim,
yading@10 1355 int *bits)
yading@10 1356 {
yading@10 1357 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
yading@10 1358 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
yading@10 1359 int i;
yading@10 1360 float cost = 0;
yading@10 1361 int qc1, qc2, qc3, qc4;
yading@10 1362 int curbits = 0;
yading@10 1363
yading@10 1364 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
yading@10 1365 float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
yading@10 1366
yading@10 1367 for (i = 0; i < size; i += 4) {
yading@10 1368 const float *vec;
yading@10 1369 int curidx;
yading@10 1370 int *in_int = (int *)&in[i];
yading@10 1371 float *in_pos = (float *)&in[i];
yading@10 1372 float di0, di1, di2, di3;
yading@10 1373
yading@10 1374 qc1 = scaled[i ] * Q34 + 0.4054f;
yading@10 1375 qc2 = scaled[i+1] * Q34 + 0.4054f;
yading@10 1376 qc3 = scaled[i+2] * Q34 + 0.4054f;
yading@10 1377 qc4 = scaled[i+3] * Q34 + 0.4054f;
yading@10 1378
yading@10 1379 __asm__ volatile (
yading@10 1380 ".set push \n\t"
yading@10 1381 ".set noreorder \n\t"
yading@10 1382
yading@10 1383 "slt %[qc1], $zero, %[qc1] \n\t"
yading@10 1384 "slt %[qc2], $zero, %[qc2] \n\t"
yading@10 1385 "slt %[qc3], $zero, %[qc3] \n\t"
yading@10 1386 "slt %[qc4], $zero, %[qc4] \n\t"
yading@10 1387 "lw $t0, 0(%[in_int]) \n\t"
yading@10 1388 "lw $t1, 4(%[in_int]) \n\t"
yading@10 1389 "lw $t2, 8(%[in_int]) \n\t"
yading@10 1390 "lw $t3, 12(%[in_int]) \n\t"
yading@10 1391 "srl $t0, $t0, 31 \n\t"
yading@10 1392 "srl $t1, $t1, 31 \n\t"
yading@10 1393 "srl $t2, $t2, 31 \n\t"
yading@10 1394 "srl $t3, $t3, 31 \n\t"
yading@10 1395 "subu $t4, $zero, %[qc1] \n\t"
yading@10 1396 "subu $t5, $zero, %[qc2] \n\t"
yading@10 1397 "subu $t6, $zero, %[qc3] \n\t"
yading@10 1398 "subu $t7, $zero, %[qc4] \n\t"
yading@10 1399 "movn %[qc1], $t4, $t0 \n\t"
yading@10 1400 "movn %[qc2], $t5, $t1 \n\t"
yading@10 1401 "movn %[qc3], $t6, $t2 \n\t"
yading@10 1402 "movn %[qc4], $t7, $t3 \n\t"
yading@10 1403
yading@10 1404 ".set pop \n\t"
yading@10 1405
yading@10 1406 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
yading@10 1407 [qc3]"+r"(qc3), [qc4]"+r"(qc4)
yading@10 1408 : [in_int]"r"(in_int)
yading@10 1409 : "t0", "t1", "t2", "t3",
yading@10 1410 "t4", "t5", "t6", "t7",
yading@10 1411 "memory"
yading@10 1412 );
yading@10 1413
yading@10 1414 curidx = qc1;
yading@10 1415 curidx *= 3;
yading@10 1416 curidx += qc2;
yading@10 1417 curidx *= 3;
yading@10 1418 curidx += qc3;
yading@10 1419 curidx *= 3;
yading@10 1420 curidx += qc4;
yading@10 1421 curidx += 40;
yading@10 1422
yading@10 1423 curbits += p_bits[curidx];
yading@10 1424 vec = &p_codes[curidx*4];
yading@10 1425
yading@10 1426 __asm__ volatile (
yading@10 1427 ".set push \n\t"
yading@10 1428 ".set noreorder \n\t"
yading@10 1429
yading@10 1430 "lwc1 $f0, 0(%[in_pos]) \n\t"
yading@10 1431 "lwc1 $f1, 0(%[vec]) \n\t"
yading@10 1432 "lwc1 $f2, 4(%[in_pos]) \n\t"
yading@10 1433 "lwc1 $f3, 4(%[vec]) \n\t"
yading@10 1434 "lwc1 $f4, 8(%[in_pos]) \n\t"
yading@10 1435 "lwc1 $f5, 8(%[vec]) \n\t"
yading@10 1436 "lwc1 $f6, 12(%[in_pos]) \n\t"
yading@10 1437 "lwc1 $f7, 12(%[vec]) \n\t"
yading@10 1438 "nmsub.s %[di0], $f0, $f1, %[IQ] \n\t"
yading@10 1439 "nmsub.s %[di1], $f2, $f3, %[IQ] \n\t"
yading@10 1440 "nmsub.s %[di2], $f4, $f5, %[IQ] \n\t"
yading@10 1441 "nmsub.s %[di3], $f6, $f7, %[IQ] \n\t"
yading@10 1442
yading@10 1443 ".set pop \n\t"
yading@10 1444
yading@10 1445 : [di0]"=&f"(di0), [di1]"=&f"(di1),
yading@10 1446 [di2]"=&f"(di2), [di3]"=&f"(di3)
yading@10 1447 : [in_pos]"r"(in_pos), [vec]"r"(vec),
yading@10 1448 [IQ]"f"(IQ)
yading@10 1449 : "$f0", "$f1", "$f2", "$f3",
yading@10 1450 "$f4", "$f5", "$f6", "$f7",
yading@10 1451 "memory"
yading@10 1452 );
yading@10 1453
yading@10 1454 cost += di0 * di0 + di1 * di1
yading@10 1455 + di2 * di2 + di3 * di3;
yading@10 1456 }
yading@10 1457
yading@10 1458 if (bits)
yading@10 1459 *bits = curbits;
yading@10 1460 return cost * lambda + curbits;
yading@10 1461 }
yading@10 1462
yading@10 1463 static float get_band_cost_UQUAD_mips(struct AACEncContext *s,
yading@10 1464 PutBitContext *pb, const float *in,
yading@10 1465 const float *scaled, int size, int scale_idx,
yading@10 1466 int cb, const float lambda, const float uplim,
yading@10 1467 int *bits)
yading@10 1468 {
yading@10 1469 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
yading@10 1470 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
yading@10 1471 int i;
yading@10 1472 float cost = 0;
yading@10 1473 int curbits = 0;
yading@10 1474 int qc1, qc2, qc3, qc4;
yading@10 1475
yading@10 1476 uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
yading@10 1477 float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
yading@10 1478
yading@10 1479 for (i = 0; i < size; i += 4) {
yading@10 1480 const float *vec;
yading@10 1481 int curidx;
yading@10 1482 float *in_pos = (float *)&in[i];
yading@10 1483 float di0, di1, di2, di3;
yading@10 1484
yading@10 1485 qc1 = scaled[i ] * Q34 + 0.4054f;
yading@10 1486 qc2 = scaled[i+1] * Q34 + 0.4054f;
yading@10 1487 qc3 = scaled[i+2] * Q34 + 0.4054f;
yading@10 1488 qc4 = scaled[i+3] * Q34 + 0.4054f;
yading@10 1489
yading@10 1490 __asm__ volatile (
yading@10 1491 ".set push \n\t"
yading@10 1492 ".set noreorder \n\t"
yading@10 1493
yading@10 1494 "ori $t4, $zero, 2 \n\t"
yading@10 1495 "slt $t0, $t4, %[qc1] \n\t"
yading@10 1496 "slt $t1, $t4, %[qc2] \n\t"
yading@10 1497 "slt $t2, $t4, %[qc3] \n\t"
yading@10 1498 "slt $t3, $t4, %[qc4] \n\t"
yading@10 1499 "movn %[qc1], $t4, $t0 \n\t"
yading@10 1500 "movn %[qc2], $t4, $t1 \n\t"
yading@10 1501 "movn %[qc3], $t4, $t2 \n\t"
yading@10 1502 "movn %[qc4], $t4, $t3 \n\t"
yading@10 1503
yading@10 1504 ".set pop \n\t"
yading@10 1505
yading@10 1506 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
yading@10 1507 [qc3]"+r"(qc3), [qc4]"+r"(qc4)
yading@10 1508 :
yading@10 1509 : "t0", "t1", "t2", "t3", "t4"
yading@10 1510 );
yading@10 1511
yading@10 1512 curidx = qc1;
yading@10 1513 curidx *= 3;
yading@10 1514 curidx += qc2;
yading@10 1515 curidx *= 3;
yading@10 1516 curidx += qc3;
yading@10 1517 curidx *= 3;
yading@10 1518 curidx += qc4;
yading@10 1519
yading@10 1520 curbits += p_bits[curidx];
yading@10 1521 curbits += uquad_sign_bits[curidx];
yading@10 1522 vec = &p_codes[curidx*4];
yading@10 1523
yading@10 1524 __asm__ volatile (
yading@10 1525 ".set push \n\t"
yading@10 1526 ".set noreorder \n\t"
yading@10 1527
yading@10 1528 "lwc1 %[di0], 0(%[in_pos]) \n\t"
yading@10 1529 "lwc1 %[di1], 4(%[in_pos]) \n\t"
yading@10 1530 "lwc1 %[di2], 8(%[in_pos]) \n\t"
yading@10 1531 "lwc1 %[di3], 12(%[in_pos]) \n\t"
yading@10 1532 "abs.s %[di0], %[di0] \n\t"
yading@10 1533 "abs.s %[di1], %[di1] \n\t"
yading@10 1534 "abs.s %[di2], %[di2] \n\t"
yading@10 1535 "abs.s %[di3], %[di3] \n\t"
yading@10 1536 "lwc1 $f0, 0(%[vec]) \n\t"
yading@10 1537 "lwc1 $f1, 4(%[vec]) \n\t"
yading@10 1538 "lwc1 $f2, 8(%[vec]) \n\t"
yading@10 1539 "lwc1 $f3, 12(%[vec]) \n\t"
yading@10 1540 "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
yading@10 1541 "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
yading@10 1542 "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
yading@10 1543 "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
yading@10 1544
yading@10 1545 ".set pop \n\t"
yading@10 1546
yading@10 1547 : [di0]"=&f"(di0), [di1]"=&f"(di1),
yading@10 1548 [di2]"=&f"(di2), [di3]"=&f"(di3)
yading@10 1549 : [in_pos]"r"(in_pos), [vec]"r"(vec),
yading@10 1550 [IQ]"f"(IQ)
yading@10 1551 : "$f0", "$f1", "$f2", "$f3",
yading@10 1552 "memory"
yading@10 1553 );
yading@10 1554
yading@10 1555 cost += di0 * di0 + di1 * di1
yading@10 1556 + di2 * di2 + di3 * di3;
yading@10 1557 }
yading@10 1558
yading@10 1559 if (bits)
yading@10 1560 *bits = curbits;
yading@10 1561 return cost * lambda + curbits;
yading@10 1562 }
yading@10 1563
yading@10 1564 static float get_band_cost_SPAIR_mips(struct AACEncContext *s,
yading@10 1565 PutBitContext *pb, const float *in,
yading@10 1566 const float *scaled, int size, int scale_idx,
yading@10 1567 int cb, const float lambda, const float uplim,
yading@10 1568 int *bits)
yading@10 1569 {
yading@10 1570 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
yading@10 1571 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
yading@10 1572 int i;
yading@10 1573 float cost = 0;
yading@10 1574 int qc1, qc2, qc3, qc4;
yading@10 1575 int curbits = 0;
yading@10 1576
yading@10 1577 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
yading@10 1578 float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
yading@10 1579
yading@10 1580 for (i = 0; i < size; i += 4) {
yading@10 1581 const float *vec, *vec2;
yading@10 1582 int curidx, curidx2;
yading@10 1583 int *in_int = (int *)&in[i];
yading@10 1584 float *in_pos = (float *)&in[i];
yading@10 1585 float di0, di1, di2, di3;
yading@10 1586
yading@10 1587 qc1 = scaled[i ] * Q34 + 0.4054f;
yading@10 1588 qc2 = scaled[i+1] * Q34 + 0.4054f;
yading@10 1589 qc3 = scaled[i+2] * Q34 + 0.4054f;
yading@10 1590 qc4 = scaled[i+3] * Q34 + 0.4054f;
yading@10 1591
yading@10 1592 __asm__ volatile (
yading@10 1593 ".set push \n\t"
yading@10 1594 ".set noreorder \n\t"
yading@10 1595
yading@10 1596 "ori $t4, $zero, 4 \n\t"
yading@10 1597 "slt $t0, $t4, %[qc1] \n\t"
yading@10 1598 "slt $t1, $t4, %[qc2] \n\t"
yading@10 1599 "slt $t2, $t4, %[qc3] \n\t"
yading@10 1600 "slt $t3, $t4, %[qc4] \n\t"
yading@10 1601 "movn %[qc1], $t4, $t0 \n\t"
yading@10 1602 "movn %[qc2], $t4, $t1 \n\t"
yading@10 1603 "movn %[qc3], $t4, $t2 \n\t"
yading@10 1604 "movn %[qc4], $t4, $t3 \n\t"
yading@10 1605 "lw $t0, 0(%[in_int]) \n\t"
yading@10 1606 "lw $t1, 4(%[in_int]) \n\t"
yading@10 1607 "lw $t2, 8(%[in_int]) \n\t"
yading@10 1608 "lw $t3, 12(%[in_int]) \n\t"
yading@10 1609 "srl $t0, $t0, 31 \n\t"
yading@10 1610 "srl $t1, $t1, 31 \n\t"
yading@10 1611 "srl $t2, $t2, 31 \n\t"
yading@10 1612 "srl $t3, $t3, 31 \n\t"
yading@10 1613 "subu $t4, $zero, %[qc1] \n\t"
yading@10 1614 "subu $t5, $zero, %[qc2] \n\t"
yading@10 1615 "subu $t6, $zero, %[qc3] \n\t"
yading@10 1616 "subu $t7, $zero, %[qc4] \n\t"
yading@10 1617 "movn %[qc1], $t4, $t0 \n\t"
yading@10 1618 "movn %[qc2], $t5, $t1 \n\t"
yading@10 1619 "movn %[qc3], $t6, $t2 \n\t"
yading@10 1620 "movn %[qc4], $t7, $t3 \n\t"
yading@10 1621
yading@10 1622 ".set pop \n\t"
yading@10 1623
yading@10 1624 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
yading@10 1625 [qc3]"+r"(qc3), [qc4]"+r"(qc4)
yading@10 1626 : [in_int]"r"(in_int)
yading@10 1627 : "t0", "t1", "t2", "t3",
yading@10 1628 "t4", "t5", "t6", "t7",
yading@10 1629 "memory"
yading@10 1630 );
yading@10 1631
yading@10 1632 curidx = 9 * qc1;
yading@10 1633 curidx += qc2 + 40;
yading@10 1634
yading@10 1635 curidx2 = 9 * qc3;
yading@10 1636 curidx2 += qc4 + 40;
yading@10 1637
yading@10 1638 curbits += p_bits[curidx];
yading@10 1639 curbits += p_bits[curidx2];
yading@10 1640
yading@10 1641 vec = &p_codes[curidx*2];
yading@10 1642 vec2 = &p_codes[curidx2*2];
yading@10 1643
yading@10 1644 __asm__ volatile (
yading@10 1645 ".set push \n\t"
yading@10 1646 ".set noreorder \n\t"
yading@10 1647
yading@10 1648 "lwc1 $f0, 0(%[in_pos]) \n\t"
yading@10 1649 "lwc1 $f1, 0(%[vec]) \n\t"
yading@10 1650 "lwc1 $f2, 4(%[in_pos]) \n\t"
yading@10 1651 "lwc1 $f3, 4(%[vec]) \n\t"
yading@10 1652 "lwc1 $f4, 8(%[in_pos]) \n\t"
yading@10 1653 "lwc1 $f5, 0(%[vec2]) \n\t"
yading@10 1654 "lwc1 $f6, 12(%[in_pos]) \n\t"
yading@10 1655 "lwc1 $f7, 4(%[vec2]) \n\t"
yading@10 1656 "nmsub.s %[di0], $f0, $f1, %[IQ] \n\t"
yading@10 1657 "nmsub.s %[di1], $f2, $f3, %[IQ] \n\t"
yading@10 1658 "nmsub.s %[di2], $f4, $f5, %[IQ] \n\t"
yading@10 1659 "nmsub.s %[di3], $f6, $f7, %[IQ] \n\t"
yading@10 1660
yading@10 1661 ".set pop \n\t"
yading@10 1662
yading@10 1663 : [di0]"=&f"(di0), [di1]"=&f"(di1),
yading@10 1664 [di2]"=&f"(di2), [di3]"=&f"(di3)
yading@10 1665 : [in_pos]"r"(in_pos), [vec]"r"(vec),
yading@10 1666 [vec2]"r"(vec2), [IQ]"f"(IQ)
yading@10 1667 : "$f0", "$f1", "$f2", "$f3",
yading@10 1668 "$f4", "$f5", "$f6", "$f7",
yading@10 1669 "memory"
yading@10 1670 );
yading@10 1671
yading@10 1672 cost += di0 * di0 + di1 * di1
yading@10 1673 + di2 * di2 + di3 * di3;
yading@10 1674 }
yading@10 1675
yading@10 1676 if (bits)
yading@10 1677 *bits = curbits;
yading@10 1678 return cost * lambda + curbits;
yading@10 1679 }
yading@10 1680
yading@10 1681 static float get_band_cost_UPAIR7_mips(struct AACEncContext *s,
yading@10 1682 PutBitContext *pb, const float *in,
yading@10 1683 const float *scaled, int size, int scale_idx,
yading@10 1684 int cb, const float lambda, const float uplim,
yading@10 1685 int *bits)
yading@10 1686 {
yading@10 1687 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
yading@10 1688 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
yading@10 1689 int i;
yading@10 1690 float cost = 0;
yading@10 1691 int qc1, qc2, qc3, qc4;
yading@10 1692 int curbits = 0;
yading@10 1693
yading@10 1694 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
yading@10 1695 float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
yading@10 1696
yading@10 1697 for (i = 0; i < size; i += 4) {
yading@10 1698 const float *vec, *vec2;
yading@10 1699 int curidx, curidx2, sign1, count1, sign2, count2;
yading@10 1700 int *in_int = (int *)&in[i];
yading@10 1701 float *in_pos = (float *)&in[i];
yading@10 1702 float di0, di1, di2, di3;
yading@10 1703
yading@10 1704 qc1 = scaled[i ] * Q34 + 0.4054f;
yading@10 1705 qc2 = scaled[i+1] * Q34 + 0.4054f;
yading@10 1706 qc3 = scaled[i+2] * Q34 + 0.4054f;
yading@10 1707 qc4 = scaled[i+3] * Q34 + 0.4054f;
yading@10 1708
yading@10 1709 __asm__ volatile (
yading@10 1710 ".set push \n\t"
yading@10 1711 ".set noreorder \n\t"
yading@10 1712
yading@10 1713 "ori $t4, $zero, 7 \n\t"
yading@10 1714 "ori %[sign1], $zero, 0 \n\t"
yading@10 1715 "ori %[sign2], $zero, 0 \n\t"
yading@10 1716 "slt $t0, $t4, %[qc1] \n\t"
yading@10 1717 "slt $t1, $t4, %[qc2] \n\t"
yading@10 1718 "slt $t2, $t4, %[qc3] \n\t"
yading@10 1719 "slt $t3, $t4, %[qc4] \n\t"
yading@10 1720 "movn %[qc1], $t4, $t0 \n\t"
yading@10 1721 "movn %[qc2], $t4, $t1 \n\t"
yading@10 1722 "movn %[qc3], $t4, $t2 \n\t"
yading@10 1723 "movn %[qc4], $t4, $t3 \n\t"
yading@10 1724 "lw $t0, 0(%[in_int]) \n\t"
yading@10 1725 "lw $t1, 4(%[in_int]) \n\t"
yading@10 1726 "lw $t2, 8(%[in_int]) \n\t"
yading@10 1727 "lw $t3, 12(%[in_int]) \n\t"
yading@10 1728 "slt $t0, $t0, $zero \n\t"
yading@10 1729 "movn %[sign1], $t0, %[qc1] \n\t"
yading@10 1730 "slt $t2, $t2, $zero \n\t"
yading@10 1731 "movn %[sign2], $t2, %[qc3] \n\t"
yading@10 1732 "slt $t1, $t1, $zero \n\t"
yading@10 1733 "sll $t0, %[sign1], 1 \n\t"
yading@10 1734 "or $t0, $t0, $t1 \n\t"
yading@10 1735 "movn %[sign1], $t0, %[qc2] \n\t"
yading@10 1736 "slt $t3, $t3, $zero \n\t"
yading@10 1737 "sll $t0, %[sign2], 1 \n\t"
yading@10 1738 "or $t0, $t0, $t3 \n\t"
yading@10 1739 "movn %[sign2], $t0, %[qc4] \n\t"
yading@10 1740 "slt %[count1], $zero, %[qc1] \n\t"
yading@10 1741 "slt $t1, $zero, %[qc2] \n\t"
yading@10 1742 "slt %[count2], $zero, %[qc3] \n\t"
yading@10 1743 "slt $t2, $zero, %[qc4] \n\t"
yading@10 1744 "addu %[count1], %[count1], $t1 \n\t"
yading@10 1745 "addu %[count2], %[count2], $t2 \n\t"
yading@10 1746
yading@10 1747 ".set pop \n\t"
yading@10 1748
yading@10 1749 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
yading@10 1750 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
yading@10 1751 [sign1]"=&r"(sign1), [count1]"=&r"(count1),
yading@10 1752 [sign2]"=&r"(sign2), [count2]"=&r"(count2)
yading@10 1753 : [in_int]"r"(in_int)
yading@10 1754 : "t0", "t1", "t2", "t3", "t4",
yading@10 1755 "memory"
yading@10 1756 );
yading@10 1757
yading@10 1758 curidx = 8 * qc1;
yading@10 1759 curidx += qc2;
yading@10 1760
yading@10 1761 curidx2 = 8 * qc3;
yading@10 1762 curidx2 += qc4;
yading@10 1763
yading@10 1764 curbits += p_bits[curidx];
yading@10 1765 curbits += upair7_sign_bits[curidx];
yading@10 1766 vec = &p_codes[curidx*2];
yading@10 1767
yading@10 1768 curbits += p_bits[curidx2];
yading@10 1769 curbits += upair7_sign_bits[curidx2];
yading@10 1770 vec2 = &p_codes[curidx2*2];
yading@10 1771
yading@10 1772 __asm__ volatile (
yading@10 1773 ".set push \n\t"
yading@10 1774 ".set noreorder \n\t"
yading@10 1775
yading@10 1776 "lwc1 %[di0], 0(%[in_pos]) \n\t"
yading@10 1777 "lwc1 %[di1], 4(%[in_pos]) \n\t"
yading@10 1778 "lwc1 %[di2], 8(%[in_pos]) \n\t"
yading@10 1779 "lwc1 %[di3], 12(%[in_pos]) \n\t"
yading@10 1780 "abs.s %[di0], %[di0] \n\t"
yading@10 1781 "abs.s %[di1], %[di1] \n\t"
yading@10 1782 "abs.s %[di2], %[di2] \n\t"
yading@10 1783 "abs.s %[di3], %[di3] \n\t"
yading@10 1784 "lwc1 $f0, 0(%[vec]) \n\t"
yading@10 1785 "lwc1 $f1, 4(%[vec]) \n\t"
yading@10 1786 "lwc1 $f2, 0(%[vec2]) \n\t"
yading@10 1787 "lwc1 $f3, 4(%[vec2]) \n\t"
yading@10 1788 "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
yading@10 1789 "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
yading@10 1790 "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
yading@10 1791 "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
yading@10 1792
yading@10 1793 ".set pop \n\t"
yading@10 1794
yading@10 1795 : [di0]"=&f"(di0), [di1]"=&f"(di1),
yading@10 1796 [di2]"=&f"(di2), [di3]"=&f"(di3)
yading@10 1797 : [in_pos]"r"(in_pos), [vec]"r"(vec),
yading@10 1798 [vec2]"r"(vec2), [IQ]"f"(IQ)
yading@10 1799 : "$f0", "$f1", "$f2", "$f3",
yading@10 1800 "memory"
yading@10 1801 );
yading@10 1802
yading@10 1803 cost += di0 * di0 + di1 * di1
yading@10 1804 + di2 * di2 + di3 * di3;
yading@10 1805 }
yading@10 1806
yading@10 1807 if (bits)
yading@10 1808 *bits = curbits;
yading@10 1809 return cost * lambda + curbits;
yading@10 1810 }
yading@10 1811
yading@10 1812 static float get_band_cost_UPAIR12_mips(struct AACEncContext *s,
yading@10 1813 PutBitContext *pb, const float *in,
yading@10 1814 const float *scaled, int size, int scale_idx,
yading@10 1815 int cb, const float lambda, const float uplim,
yading@10 1816 int *bits)
yading@10 1817 {
yading@10 1818 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
yading@10 1819 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
yading@10 1820 int i;
yading@10 1821 float cost = 0;
yading@10 1822 int qc1, qc2, qc3, qc4;
yading@10 1823 int curbits = 0;
yading@10 1824
yading@10 1825 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
yading@10 1826 float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
yading@10 1827
yading@10 1828 for (i = 0; i < size; i += 4) {
yading@10 1829 const float *vec, *vec2;
yading@10 1830 int curidx, curidx2;
yading@10 1831 int sign1, count1, sign2, count2;
yading@10 1832 int *in_int = (int *)&in[i];
yading@10 1833 float *in_pos = (float *)&in[i];
yading@10 1834 float di0, di1, di2, di3;
yading@10 1835
yading@10 1836 qc1 = scaled[i ] * Q34 + 0.4054f;
yading@10 1837 qc2 = scaled[i+1] * Q34 + 0.4054f;
yading@10 1838 qc3 = scaled[i+2] * Q34 + 0.4054f;
yading@10 1839 qc4 = scaled[i+3] * Q34 + 0.4054f;
yading@10 1840
yading@10 1841 __asm__ volatile (
yading@10 1842 ".set push \n\t"
yading@10 1843 ".set noreorder \n\t"
yading@10 1844
yading@10 1845 "ori $t4, $zero, 12 \n\t"
yading@10 1846 "ori %[sign1], $zero, 0 \n\t"
yading@10 1847 "ori %[sign2], $zero, 0 \n\t"
yading@10 1848 "slt $t0, $t4, %[qc1] \n\t"
yading@10 1849 "slt $t1, $t4, %[qc2] \n\t"
yading@10 1850 "slt $t2, $t4, %[qc3] \n\t"
yading@10 1851 "slt $t3, $t4, %[qc4] \n\t"
yading@10 1852 "movn %[qc1], $t4, $t0 \n\t"
yading@10 1853 "movn %[qc2], $t4, $t1 \n\t"
yading@10 1854 "movn %[qc3], $t4, $t2 \n\t"
yading@10 1855 "movn %[qc4], $t4, $t3 \n\t"
yading@10 1856 "lw $t0, 0(%[in_int]) \n\t"
yading@10 1857 "lw $t1, 4(%[in_int]) \n\t"
yading@10 1858 "lw $t2, 8(%[in_int]) \n\t"
yading@10 1859 "lw $t3, 12(%[in_int]) \n\t"
yading@10 1860 "slt $t0, $t0, $zero \n\t"
yading@10 1861 "movn %[sign1], $t0, %[qc1] \n\t"
yading@10 1862 "slt $t2, $t2, $zero \n\t"
yading@10 1863 "movn %[sign2], $t2, %[qc3] \n\t"
yading@10 1864 "slt $t1, $t1, $zero \n\t"
yading@10 1865 "sll $t0, %[sign1], 1 \n\t"
yading@10 1866 "or $t0, $t0, $t1 \n\t"
yading@10 1867 "movn %[sign1], $t0, %[qc2] \n\t"
yading@10 1868 "slt $t3, $t3, $zero \n\t"
yading@10 1869 "sll $t0, %[sign2], 1 \n\t"
yading@10 1870 "or $t0, $t0, $t3 \n\t"
yading@10 1871 "movn %[sign2], $t0, %[qc4] \n\t"
yading@10 1872 "slt %[count1], $zero, %[qc1] \n\t"
yading@10 1873 "slt $t1, $zero, %[qc2] \n\t"
yading@10 1874 "slt %[count2], $zero, %[qc3] \n\t"
yading@10 1875 "slt $t2, $zero, %[qc4] \n\t"
yading@10 1876 "addu %[count1], %[count1], $t1 \n\t"
yading@10 1877 "addu %[count2], %[count2], $t2 \n\t"
yading@10 1878
yading@10 1879 ".set pop \n\t"
yading@10 1880
yading@10 1881 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
yading@10 1882 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
yading@10 1883 [sign1]"=&r"(sign1), [count1]"=&r"(count1),
yading@10 1884 [sign2]"=&r"(sign2), [count2]"=&r"(count2)
yading@10 1885 : [in_int]"r"(in_int)
yading@10 1886 : "t0", "t1", "t2", "t3", "t4",
yading@10 1887 "memory"
yading@10 1888 );
yading@10 1889
yading@10 1890 curidx = 13 * qc1;
yading@10 1891 curidx += qc2;
yading@10 1892
yading@10 1893 curidx2 = 13 * qc3;
yading@10 1894 curidx2 += qc4;
yading@10 1895
yading@10 1896 curbits += p_bits[curidx];
yading@10 1897 curbits += p_bits[curidx2];
yading@10 1898 curbits += upair12_sign_bits[curidx];
yading@10 1899 curbits += upair12_sign_bits[curidx2];
yading@10 1900 vec = &p_codes[curidx*2];
yading@10 1901 vec2 = &p_codes[curidx2*2];
yading@10 1902
yading@10 1903 __asm__ volatile (
yading@10 1904 ".set push \n\t"
yading@10 1905 ".set noreorder \n\t"
yading@10 1906
yading@10 1907 "lwc1 %[di0], 0(%[in_pos]) \n\t"
yading@10 1908 "lwc1 %[di1], 4(%[in_pos]) \n\t"
yading@10 1909 "lwc1 %[di2], 8(%[in_pos]) \n\t"
yading@10 1910 "lwc1 %[di3], 12(%[in_pos]) \n\t"
yading@10 1911 "abs.s %[di0], %[di0] \n\t"
yading@10 1912 "abs.s %[di1], %[di1] \n\t"
yading@10 1913 "abs.s %[di2], %[di2] \n\t"
yading@10 1914 "abs.s %[di3], %[di3] \n\t"
yading@10 1915 "lwc1 $f0, 0(%[vec]) \n\t"
yading@10 1916 "lwc1 $f1, 4(%[vec]) \n\t"
yading@10 1917 "lwc1 $f2, 0(%[vec2]) \n\t"
yading@10 1918 "lwc1 $f3, 4(%[vec2]) \n\t"
yading@10 1919 "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
yading@10 1920 "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
yading@10 1921 "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
yading@10 1922 "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
yading@10 1923
yading@10 1924 ".set pop \n\t"
yading@10 1925
yading@10 1926 : [di0]"=&f"(di0), [di1]"=&f"(di1),
yading@10 1927 [di2]"=&f"(di2), [di3]"=&f"(di3)
yading@10 1928 : [in_pos]"r"(in_pos), [vec]"r"(vec),
yading@10 1929 [vec2]"r"(vec2), [IQ]"f"(IQ)
yading@10 1930 : "$f0", "$f1", "$f2", "$f3",
yading@10 1931 "memory"
yading@10 1932 );
yading@10 1933
yading@10 1934 cost += di0 * di0 + di1 * di1
yading@10 1935 + di2 * di2 + di3 * di3;
yading@10 1936 }
yading@10 1937
yading@10 1938 if (bits)
yading@10 1939 *bits = curbits;
yading@10 1940 return cost * lambda + curbits;
yading@10 1941 }
yading@10 1942
yading@10 1943 static float get_band_cost_ESC_mips(struct AACEncContext *s,
yading@10 1944 PutBitContext *pb, const float *in,
yading@10 1945 const float *scaled, int size, int scale_idx,
yading@10 1946 int cb, const float lambda, const float uplim,
yading@10 1947 int *bits)
yading@10 1948 {
yading@10 1949 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
yading@10 1950 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
yading@10 1951 const float CLIPPED_ESCAPE = 165140.0f * IQ;
yading@10 1952 int i;
yading@10 1953 float cost = 0;
yading@10 1954 int qc1, qc2, qc3, qc4;
yading@10 1955 int curbits = 0;
yading@10 1956
yading@10 1957 uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
yading@10 1958 float *p_codes = (float* )ff_aac_codebook_vectors[cb-1];
yading@10 1959
yading@10 1960 for (i = 0; i < size; i += 4) {
yading@10 1961 const float *vec, *vec2;
yading@10 1962 int curidx, curidx2;
yading@10 1963 float t1, t2, t3, t4;
yading@10 1964 float di1, di2, di3, di4;
yading@10 1965 int cond0, cond1, cond2, cond3;
yading@10 1966 int c1, c2, c3, c4;
yading@10 1967
yading@10 1968 qc1 = scaled[i ] * Q34 + 0.4054f;
yading@10 1969 qc2 = scaled[i+1] * Q34 + 0.4054f;
yading@10 1970 qc3 = scaled[i+2] * Q34 + 0.4054f;
yading@10 1971 qc4 = scaled[i+3] * Q34 + 0.4054f;
yading@10 1972
yading@10 1973 __asm__ volatile (
yading@10 1974 ".set push \n\t"
yading@10 1975 ".set noreorder \n\t"
yading@10 1976
yading@10 1977 "ori $t4, $zero, 15 \n\t"
yading@10 1978 "ori $t5, $zero, 16 \n\t"
yading@10 1979 "shll_s.w %[c1], %[qc1], 18 \n\t"
yading@10 1980 "shll_s.w %[c2], %[qc2], 18 \n\t"
yading@10 1981 "shll_s.w %[c3], %[qc3], 18 \n\t"
yading@10 1982 "shll_s.w %[c4], %[qc4], 18 \n\t"
yading@10 1983 "srl %[c1], %[c1], 18 \n\t"
yading@10 1984 "srl %[c2], %[c2], 18 \n\t"
yading@10 1985 "srl %[c3], %[c3], 18 \n\t"
yading@10 1986 "srl %[c4], %[c4], 18 \n\t"
yading@10 1987 "slt %[cond0], $t4, %[qc1] \n\t"
yading@10 1988 "slt %[cond1], $t4, %[qc2] \n\t"
yading@10 1989 "slt %[cond2], $t4, %[qc3] \n\t"
yading@10 1990 "slt %[cond3], $t4, %[qc4] \n\t"
yading@10 1991 "movn %[qc1], $t5, %[cond0] \n\t"
yading@10 1992 "movn %[qc2], $t5, %[cond1] \n\t"
yading@10 1993 "movn %[qc3], $t5, %[cond2] \n\t"
yading@10 1994 "movn %[qc4], $t5, %[cond3] \n\t"
yading@10 1995
yading@10 1996 ".set pop \n\t"
yading@10 1997
yading@10 1998 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
yading@10 1999 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
yading@10 2000 [cond0]"=&r"(cond0), [cond1]"=&r"(cond1),
yading@10 2001 [cond2]"=&r"(cond2), [cond3]"=&r"(cond3),
yading@10 2002 [c1]"=&r"(c1), [c2]"=&r"(c2),
yading@10 2003 [c3]"=&r"(c3), [c4]"=&r"(c4)
yading@10 2004 :
yading@10 2005 : "t4", "t5"
yading@10 2006 );
yading@10 2007
yading@10 2008 curidx = 17 * qc1;
yading@10 2009 curidx += qc2;
yading@10 2010
yading@10 2011 curidx2 = 17 * qc3;
yading@10 2012 curidx2 += qc4;
yading@10 2013
yading@10 2014 curbits += p_bits[curidx];
yading@10 2015 curbits += esc_sign_bits[curidx];
yading@10 2016 vec = &p_codes[curidx*2];
yading@10 2017
yading@10 2018 curbits += p_bits[curidx2];
yading@10 2019 curbits += esc_sign_bits[curidx2];
yading@10 2020 vec2 = &p_codes[curidx2*2];
yading@10 2021
yading@10 2022 curbits += (av_log2(c1) * 2 - 3) & (-cond0);
yading@10 2023 curbits += (av_log2(c2) * 2 - 3) & (-cond1);
yading@10 2024 curbits += (av_log2(c3) * 2 - 3) & (-cond2);
yading@10 2025 curbits += (av_log2(c4) * 2 - 3) & (-cond3);
yading@10 2026
yading@10 2027 t1 = fabsf(in[i ]);
yading@10 2028 t2 = fabsf(in[i+1]);
yading@10 2029 t3 = fabsf(in[i+2]);
yading@10 2030 t4 = fabsf(in[i+3]);
yading@10 2031
yading@10 2032 if (cond0) {
yading@10 2033 if (t1 >= CLIPPED_ESCAPE) {
yading@10 2034 di1 = t1 - CLIPPED_ESCAPE;
yading@10 2035 } else {
yading@10 2036 di1 = t1 - c1 * cbrtf(c1) * IQ;
yading@10 2037 }
yading@10 2038 } else
yading@10 2039 di1 = t1 - vec[0] * IQ;
yading@10 2040
yading@10 2041 if (cond1) {
yading@10 2042 if (t2 >= CLIPPED_ESCAPE) {
yading@10 2043 di2 = t2 - CLIPPED_ESCAPE;
yading@10 2044 } else {
yading@10 2045 di2 = t2 - c2 * cbrtf(c2) * IQ;
yading@10 2046 }
yading@10 2047 } else
yading@10 2048 di2 = t2 - vec[1] * IQ;
yading@10 2049
yading@10 2050 if (cond2) {
yading@10 2051 if (t3 >= CLIPPED_ESCAPE) {
yading@10 2052 di3 = t3 - CLIPPED_ESCAPE;
yading@10 2053 } else {
yading@10 2054 di3 = t3 - c3 * cbrtf(c3) * IQ;
yading@10 2055 }
yading@10 2056 } else
yading@10 2057 di3 = t3 - vec2[0] * IQ;
yading@10 2058
yading@10 2059 if (cond3) {
yading@10 2060 if (t4 >= CLIPPED_ESCAPE) {
yading@10 2061 di4 = t4 - CLIPPED_ESCAPE;
yading@10 2062 } else {
yading@10 2063 di4 = t4 - c4 * cbrtf(c4) * IQ;
yading@10 2064 }
yading@10 2065 } else
yading@10 2066 di4 = t4 - vec2[1]*IQ;
yading@10 2067
yading@10 2068 cost += di1 * di1 + di2 * di2
yading@10 2069 + di3 * di3 + di4 * di4;
yading@10 2070 }
yading@10 2071
yading@10 2072 if (bits)
yading@10 2073 *bits = curbits;
yading@10 2074 return cost * lambda + curbits;
yading@10 2075 }
yading@10 2076
yading@10 2077 static float (*const get_band_cost_arr[])(struct AACEncContext *s,
yading@10 2078 PutBitContext *pb, const float *in,
yading@10 2079 const float *scaled, int size, int scale_idx,
yading@10 2080 int cb, const float lambda, const float uplim,
yading@10 2081 int *bits) = {
yading@10 2082 get_band_cost_ZERO_mips,
yading@10 2083 get_band_cost_SQUAD_mips,
yading@10 2084 get_band_cost_SQUAD_mips,
yading@10 2085 get_band_cost_UQUAD_mips,
yading@10 2086 get_band_cost_UQUAD_mips,
yading@10 2087 get_band_cost_SPAIR_mips,
yading@10 2088 get_band_cost_SPAIR_mips,
yading@10 2089 get_band_cost_UPAIR7_mips,
yading@10 2090 get_band_cost_UPAIR7_mips,
yading@10 2091 get_band_cost_UPAIR12_mips,
yading@10 2092 get_band_cost_UPAIR12_mips,
yading@10 2093 get_band_cost_ESC_mips,
yading@10 2094 };
yading@10 2095
yading@10 2096 #define get_band_cost( \
yading@10 2097 s, pb, in, scaled, size, scale_idx, cb, \
yading@10 2098 lambda, uplim, bits) \
yading@10 2099 get_band_cost_arr[cb]( \
yading@10 2100 s, pb, in, scaled, size, scale_idx, cb, \
yading@10 2101 lambda, uplim, bits)
yading@10 2102
yading@10 2103 static float quantize_band_cost(struct AACEncContext *s, const float *in,
yading@10 2104 const float *scaled, int size, int scale_idx,
yading@10 2105 int cb, const float lambda, const float uplim,
yading@10 2106 int *bits)
yading@10 2107 {
yading@10 2108 return get_band_cost(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits);
yading@10 2109 }
yading@10 2110
yading@10 2111 static void search_for_quantizers_twoloop_mips(AVCodecContext *avctx,
yading@10 2112 AACEncContext *s,
yading@10 2113 SingleChannelElement *sce,
yading@10 2114 const float lambda)
yading@10 2115 {
yading@10 2116 int start = 0, i, w, w2, g;
yading@10 2117 int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate / avctx->channels;
yading@10 2118 float dists[128] = { 0 }, uplims[128];
yading@10 2119 float maxvals[128];
yading@10 2120 int fflag, minscaler;
yading@10 2121 int its = 0;
yading@10 2122 int allz = 0;
yading@10 2123 float minthr = INFINITY;
yading@10 2124
yading@10 2125 destbits = FFMIN(destbits, 5800);
yading@10 2126 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
yading@10 2127 for (g = 0; g < sce->ics.num_swb; g++) {
yading@10 2128 int nz = 0;
yading@10 2129 float uplim = 0.0f;
yading@10 2130 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
yading@10 2131 FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
yading@10 2132 uplim += band->threshold;
yading@10 2133 if (band->energy <= band->threshold || band->threshold == 0.0f) {
yading@10 2134 sce->zeroes[(w+w2)*16+g] = 1;
yading@10 2135 continue;
yading@10 2136 }
yading@10 2137 nz = 1;
yading@10 2138 }
yading@10 2139 uplims[w*16+g] = uplim *512;
yading@10 2140 sce->zeroes[w*16+g] = !nz;
yading@10 2141 if (nz)
yading@10 2142 minthr = FFMIN(minthr, uplim);
yading@10 2143 allz |= nz;
yading@10 2144 }
yading@10 2145 }
yading@10 2146 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
yading@10 2147 for (g = 0; g < sce->ics.num_swb; g++) {
yading@10 2148 if (sce->zeroes[w*16+g]) {
yading@10 2149 sce->sf_idx[w*16+g] = SCALE_ONE_POS;
yading@10 2150 continue;
yading@10 2151 }
yading@10 2152 sce->sf_idx[w*16+g] = SCALE_ONE_POS + FFMIN(log2f(uplims[w*16+g]/minthr)*4,59);
yading@10 2153 }
yading@10 2154 }
yading@10 2155
yading@10 2156 if (!allz)
yading@10 2157 return;
yading@10 2158 abs_pow34_v(s->scoefs, sce->coeffs, 1024);
yading@10 2159
yading@10 2160 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
yading@10 2161 start = w*128;
yading@10 2162 for (g = 0; g < sce->ics.num_swb; g++) {
yading@10 2163 const float *scaled = s->scoefs + start;
yading@10 2164 maxvals[w*16+g] = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], scaled);
yading@10 2165 start += sce->ics.swb_sizes[g];
yading@10 2166 }
yading@10 2167 }
yading@10 2168
yading@10 2169 do {
yading@10 2170 int tbits, qstep;
yading@10 2171 minscaler = sce->sf_idx[0];
yading@10 2172 qstep = its ? 1 : 32;
yading@10 2173 do {
yading@10 2174 int prev = -1;
yading@10 2175 tbits = 0;
yading@10 2176 fflag = 0;
yading@10 2177
yading@10 2178 if (qstep > 1) {
yading@10 2179 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
yading@10 2180 start = w*128;
yading@10 2181 for (g = 0; g < sce->ics.num_swb; g++) {
yading@10 2182 const float *coefs = sce->coeffs + start;
yading@10 2183 const float *scaled = s->scoefs + start;
yading@10 2184 int bits = 0;
yading@10 2185 int cb;
yading@10 2186
yading@10 2187 if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
yading@10 2188 start += sce->ics.swb_sizes[g];
yading@10 2189 continue;
yading@10 2190 }
yading@10 2191 minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
yading@10 2192 cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
yading@10 2193 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
yading@10 2194 int b;
yading@10 2195 bits += quantize_band_cost_bits(s, coefs + w2*128,
yading@10 2196 scaled + w2*128,
yading@10 2197 sce->ics.swb_sizes[g],
yading@10 2198 sce->sf_idx[w*16+g],
yading@10 2199 cb,
yading@10 2200 1.0f,
yading@10 2201 INFINITY,
yading@10 2202 &b);
yading@10 2203 }
yading@10 2204 if (prev != -1) {
yading@10 2205 bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO];
yading@10 2206 }
yading@10 2207 tbits += bits;
yading@10 2208 start += sce->ics.swb_sizes[g];
yading@10 2209 prev = sce->sf_idx[w*16+g];
yading@10 2210 }
yading@10 2211 }
yading@10 2212 }
yading@10 2213 else {
yading@10 2214 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
yading@10 2215 start = w*128;
yading@10 2216 for (g = 0; g < sce->ics.num_swb; g++) {
yading@10 2217 const float *coefs = sce->coeffs + start;
yading@10 2218 const float *scaled = s->scoefs + start;
yading@10 2219 int bits = 0;
yading@10 2220 int cb;
yading@10 2221 float dist = 0.0f;
yading@10 2222
yading@10 2223 if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
yading@10 2224 start += sce->ics.swb_sizes[g];
yading@10 2225 continue;
yading@10 2226 }
yading@10 2227 minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
yading@10 2228 cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
yading@10 2229 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
yading@10 2230 int b;
yading@10 2231 dist += quantize_band_cost(s, coefs + w2*128,
yading@10 2232 scaled + w2*128,
yading@10 2233 sce->ics.swb_sizes[g],
yading@10 2234 sce->sf_idx[w*16+g],
yading@10 2235 cb,
yading@10 2236 1.0f,
yading@10 2237 INFINITY,
yading@10 2238 &b);
yading@10 2239 bits += b;
yading@10 2240 }
yading@10 2241 dists[w*16+g] = dist - bits;
yading@10 2242 if (prev != -1) {
yading@10 2243 bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO];
yading@10 2244 }
yading@10 2245 tbits += bits;
yading@10 2246 start += sce->ics.swb_sizes[g];
yading@10 2247 prev = sce->sf_idx[w*16+g];
yading@10 2248 }
yading@10 2249 }
yading@10 2250 }
yading@10 2251 if (tbits > destbits) {
yading@10 2252 for (i = 0; i < 128; i++)
yading@10 2253 if (sce->sf_idx[i] < 218 - qstep)
yading@10 2254 sce->sf_idx[i] += qstep;
yading@10 2255 } else {
yading@10 2256 for (i = 0; i < 128; i++)
yading@10 2257 if (sce->sf_idx[i] > 60 - qstep)
yading@10 2258 sce->sf_idx[i] -= qstep;
yading@10 2259 }
yading@10 2260 qstep >>= 1;
yading@10 2261 if (!qstep && tbits > destbits*1.02 && sce->sf_idx[0] < 217)
yading@10 2262 qstep = 1;
yading@10 2263 } while (qstep);
yading@10 2264
yading@10 2265 fflag = 0;
yading@10 2266 minscaler = av_clip(minscaler, 60, 255 - SCALE_MAX_DIFF);
yading@10 2267 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
yading@10 2268 for (g = 0; g < sce->ics.num_swb; g++) {
yading@10 2269 int prevsc = sce->sf_idx[w*16+g];
yading@10 2270 if (dists[w*16+g] > uplims[w*16+g] && sce->sf_idx[w*16+g] > 60) {
yading@10 2271 if (find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1))
yading@10 2272 sce->sf_idx[w*16+g]--;
yading@10 2273 else
yading@10 2274 sce->sf_idx[w*16+g]-=2;
yading@10 2275 }
yading@10 2276 sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF);
yading@10 2277 sce->sf_idx[w*16+g] = FFMIN(sce->sf_idx[w*16+g], 219);
yading@10 2278 if (sce->sf_idx[w*16+g] != prevsc)
yading@10 2279 fflag = 1;
yading@10 2280 sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
yading@10 2281 }
yading@10 2282 }
yading@10 2283 its++;
yading@10 2284 } while (fflag && its < 10);
yading@10 2285 }
yading@10 2286
yading@10 2287 static void search_for_ms_mips(AACEncContext *s, ChannelElement *cpe,
yading@10 2288 const float lambda)
yading@10 2289 {
yading@10 2290 int start = 0, i, w, w2, g;
yading@10 2291 float M[128], S[128];
yading@10 2292 float *L34 = s->scoefs, *R34 = s->scoefs + 128, *M34 = s->scoefs + 128*2, *S34 = s->scoefs + 128*3;
yading@10 2293 SingleChannelElement *sce0 = &cpe->ch[0];
yading@10 2294 SingleChannelElement *sce1 = &cpe->ch[1];
yading@10 2295 if (!cpe->common_window)
yading@10 2296 return;
yading@10 2297 for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
yading@10 2298 for (g = 0; g < sce0->ics.num_swb; g++) {
yading@10 2299 if (!cpe->ch[0].zeroes[w*16+g] && !cpe->ch[1].zeroes[w*16+g]) {
yading@10 2300 float dist1 = 0.0f, dist2 = 0.0f;
yading@10 2301 for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
yading@10 2302 FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
yading@10 2303 FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
yading@10 2304 float minthr = FFMIN(band0->threshold, band1->threshold);
yading@10 2305 float maxthr = FFMAX(band0->threshold, band1->threshold);
yading@10 2306 for (i = 0; i < sce0->ics.swb_sizes[g]; i+=4) {
yading@10 2307 M[i ] = (sce0->coeffs[start+w2*128+i ]
yading@10 2308 + sce1->coeffs[start+w2*128+i ]) * 0.5;
yading@10 2309 M[i+1] = (sce0->coeffs[start+w2*128+i+1]
yading@10 2310 + sce1->coeffs[start+w2*128+i+1]) * 0.5;
yading@10 2311 M[i+2] = (sce0->coeffs[start+w2*128+i+2]
yading@10 2312 + sce1->coeffs[start+w2*128+i+2]) * 0.5;
yading@10 2313 M[i+3] = (sce0->coeffs[start+w2*128+i+3]
yading@10 2314 + sce1->coeffs[start+w2*128+i+3]) * 0.5;
yading@10 2315
yading@10 2316 S[i ] = M[i ]
yading@10 2317 - sce1->coeffs[start+w2*128+i ];
yading@10 2318 S[i+1] = M[i+1]
yading@10 2319 - sce1->coeffs[start+w2*128+i+1];
yading@10 2320 S[i+2] = M[i+2]
yading@10 2321 - sce1->coeffs[start+w2*128+i+2];
yading@10 2322 S[i+3] = M[i+3]
yading@10 2323 - sce1->coeffs[start+w2*128+i+3];
yading@10 2324 }
yading@10 2325 abs_pow34_v(L34, sce0->coeffs+start+w2*128, sce0->ics.swb_sizes[g]);
yading@10 2326 abs_pow34_v(R34, sce1->coeffs+start+w2*128, sce0->ics.swb_sizes[g]);
yading@10 2327 abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]);
yading@10 2328 abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]);
yading@10 2329 dist1 += quantize_band_cost(s, sce0->coeffs + start + w2*128,
yading@10 2330 L34,
yading@10 2331 sce0->ics.swb_sizes[g],
yading@10 2332 sce0->sf_idx[(w+w2)*16+g],
yading@10 2333 sce0->band_type[(w+w2)*16+g],
yading@10 2334 lambda / band0->threshold, INFINITY, NULL);
yading@10 2335 dist1 += quantize_band_cost(s, sce1->coeffs + start + w2*128,
yading@10 2336 R34,
yading@10 2337 sce1->ics.swb_sizes[g],
yading@10 2338 sce1->sf_idx[(w+w2)*16+g],
yading@10 2339 sce1->band_type[(w+w2)*16+g],
yading@10 2340 lambda / band1->threshold, INFINITY, NULL);
yading@10 2341 dist2 += quantize_band_cost(s, M,
yading@10 2342 M34,
yading@10 2343 sce0->ics.swb_sizes[g],
yading@10 2344 sce0->sf_idx[(w+w2)*16+g],
yading@10 2345 sce0->band_type[(w+w2)*16+g],
yading@10 2346 lambda / maxthr, INFINITY, NULL);
yading@10 2347 dist2 += quantize_band_cost(s, S,
yading@10 2348 S34,
yading@10 2349 sce1->ics.swb_sizes[g],
yading@10 2350 sce1->sf_idx[(w+w2)*16+g],
yading@10 2351 sce1->band_type[(w+w2)*16+g],
yading@10 2352 lambda / minthr, INFINITY, NULL);
yading@10 2353 }
yading@10 2354 cpe->ms_mask[w*16+g] = dist2 < dist1;
yading@10 2355 }
yading@10 2356 start += sce0->ics.swb_sizes[g];
yading@10 2357 }
yading@10 2358 }
yading@10 2359 }
yading@10 2360 #endif /*HAVE_MIPSFPU */
yading@10 2361
yading@10 2362 static void codebook_trellis_rate_mips(AACEncContext *s, SingleChannelElement *sce,
yading@10 2363 int win, int group_len, const float lambda)
yading@10 2364 {
yading@10 2365 BandCodingPath path[120][12];
yading@10 2366 int w, swb, cb, start, size;
yading@10 2367 int i, j;
yading@10 2368 const int max_sfb = sce->ics.max_sfb;
yading@10 2369 const int run_bits = sce->ics.num_windows == 1 ? 5 : 3;
yading@10 2370 const int run_esc = (1 << run_bits) - 1;
yading@10 2371 int idx, ppos, count;
yading@10 2372 int stackrun[120], stackcb[120], stack_len;
yading@10 2373 float next_minbits = INFINITY;
yading@10 2374 int next_mincb = 0;
yading@10 2375
yading@10 2376 abs_pow34_v(s->scoefs, sce->coeffs, 1024);
yading@10 2377 start = win*128;
yading@10 2378 for (cb = 0; cb < 12; cb++) {
yading@10 2379 path[0][cb].cost = run_bits+4;
yading@10 2380 path[0][cb].prev_idx = -1;
yading@10 2381 path[0][cb].run = 0;
yading@10 2382 }
yading@10 2383 for (swb = 0; swb < max_sfb; swb++) {
yading@10 2384 size = sce->ics.swb_sizes[swb];
yading@10 2385 if (sce->zeroes[win*16 + swb]) {
yading@10 2386 float cost_stay_here = path[swb][0].cost;
yading@10 2387 float cost_get_here = next_minbits + run_bits + 4;
yading@10 2388 if ( run_value_bits[sce->ics.num_windows == 8][path[swb][0].run]
yading@10 2389 != run_value_bits[sce->ics.num_windows == 8][path[swb][0].run+1])
yading@10 2390 cost_stay_here += run_bits;
yading@10 2391 if (cost_get_here < cost_stay_here) {
yading@10 2392 path[swb+1][0].prev_idx = next_mincb;
yading@10 2393 path[swb+1][0].cost = cost_get_here;
yading@10 2394 path[swb+1][0].run = 1;
yading@10 2395 } else {
yading@10 2396 path[swb+1][0].prev_idx = 0;
yading@10 2397 path[swb+1][0].cost = cost_stay_here;
yading@10 2398 path[swb+1][0].run = path[swb][0].run + 1;
yading@10 2399 }
yading@10 2400 next_minbits = path[swb+1][0].cost;
yading@10 2401 next_mincb = 0;
yading@10 2402 for (cb = 1; cb < 12; cb++) {
yading@10 2403 path[swb+1][cb].cost = 61450;
yading@10 2404 path[swb+1][cb].prev_idx = -1;
yading@10 2405 path[swb+1][cb].run = 0;
yading@10 2406 }
yading@10 2407 } else {
yading@10 2408 float minbits = next_minbits;
yading@10 2409 int mincb = next_mincb;
yading@10 2410 int startcb = sce->band_type[win*16+swb];
yading@10 2411 next_minbits = INFINITY;
yading@10 2412 next_mincb = 0;
yading@10 2413 for (cb = 0; cb < startcb; cb++) {
yading@10 2414 path[swb+1][cb].cost = 61450;
yading@10 2415 path[swb+1][cb].prev_idx = -1;
yading@10 2416 path[swb+1][cb].run = 0;
yading@10 2417 }
yading@10 2418 for (cb = startcb; cb < 12; cb++) {
yading@10 2419 float cost_stay_here, cost_get_here;
yading@10 2420 float bits = 0.0f;
yading@10 2421 for (w = 0; w < group_len; w++) {
yading@10 2422 bits += quantize_band_cost_bits(s, sce->coeffs + start + w*128,
yading@10 2423 s->scoefs + start + w*128, size,
yading@10 2424 sce->sf_idx[(win+w)*16+swb], cb,
yading@10 2425 0, INFINITY, NULL);
yading@10 2426 }
yading@10 2427 cost_stay_here = path[swb][cb].cost + bits;
yading@10 2428 cost_get_here = minbits + bits + run_bits + 4;
yading@10 2429 if ( run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run]
yading@10 2430 != run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run+1])
yading@10 2431 cost_stay_here += run_bits;
yading@10 2432 if (cost_get_here < cost_stay_here) {
yading@10 2433 path[swb+1][cb].prev_idx = mincb;
yading@10 2434 path[swb+1][cb].cost = cost_get_here;
yading@10 2435 path[swb+1][cb].run = 1;
yading@10 2436 } else {
yading@10 2437 path[swb+1][cb].prev_idx = cb;
yading@10 2438 path[swb+1][cb].cost = cost_stay_here;
yading@10 2439 path[swb+1][cb].run = path[swb][cb].run + 1;
yading@10 2440 }
yading@10 2441 if (path[swb+1][cb].cost < next_minbits) {
yading@10 2442 next_minbits = path[swb+1][cb].cost;
yading@10 2443 next_mincb = cb;
yading@10 2444 }
yading@10 2445 }
yading@10 2446 }
yading@10 2447 start += sce->ics.swb_sizes[swb];
yading@10 2448 }
yading@10 2449
yading@10 2450 stack_len = 0;
yading@10 2451 idx = 0;
yading@10 2452 for (cb = 1; cb < 12; cb++)
yading@10 2453 if (path[max_sfb][cb].cost < path[max_sfb][idx].cost)
yading@10 2454 idx = cb;
yading@10 2455 ppos = max_sfb;
yading@10 2456 while (ppos > 0) {
yading@10 2457 av_assert1(idx >= 0);
yading@10 2458 cb = idx;
yading@10 2459 stackrun[stack_len] = path[ppos][cb].run;
yading@10 2460 stackcb [stack_len] = cb;
yading@10 2461 idx = path[ppos-path[ppos][cb].run+1][cb].prev_idx;
yading@10 2462 ppos -= path[ppos][cb].run;
yading@10 2463 stack_len++;
yading@10 2464 }
yading@10 2465
yading@10 2466 start = 0;
yading@10 2467 for (i = stack_len - 1; i >= 0; i--) {
yading@10 2468 put_bits(&s->pb, 4, stackcb[i]);
yading@10 2469 count = stackrun[i];
yading@10 2470 memset(sce->zeroes + win*16 + start, !stackcb[i], count);
yading@10 2471 for (j = 0; j < count; j++) {
yading@10 2472 sce->band_type[win*16 + start] = stackcb[i];
yading@10 2473 start++;
yading@10 2474 }
yading@10 2475 while (count >= run_esc) {
yading@10 2476 put_bits(&s->pb, run_bits, run_esc);
yading@10 2477 count -= run_esc;
yading@10 2478 }
yading@10 2479 put_bits(&s->pb, run_bits, count);
yading@10 2480 }
yading@10 2481 }
yading@10 2482 #endif /* HAVE_INLINE_ASM */
yading@10 2483
yading@10 2484 void ff_aac_coder_init_mips(AACEncContext *c) {
yading@10 2485 #if HAVE_INLINE_ASM
yading@10 2486 AACCoefficientsEncoder *e = c->coder;
yading@10 2487 int option = c->options.aac_coder;
yading@10 2488
yading@10 2489 if (option == 2) {
yading@10 2490 e->quantize_and_encode_band = quantize_and_encode_band_mips;
yading@10 2491 e->encode_window_bands_info = codebook_trellis_rate_mips;
yading@10 2492 #if HAVE_MIPSFPU
yading@10 2493 e->search_for_quantizers = search_for_quantizers_twoloop_mips;
yading@10 2494 e->search_for_ms = search_for_ms_mips;
yading@10 2495 #endif /* HAVE_MIPSFPU */
yading@10 2496 }
yading@10 2497 #endif /* HAVE_INLINE_ASM */
yading@10 2498 }