yading@10: /* yading@10: * Copyright (c) 2012 yading@10: * MIPS Technologies, Inc., California. yading@10: * yading@10: * Redistribution and use in source and binary forms, with or without yading@10: * modification, are permitted provided that the following conditions yading@10: * are met: yading@10: * 1. Redistributions of source code must retain the above copyright yading@10: * notice, this list of conditions and the following disclaimer. yading@10: * 2. Redistributions in binary form must reproduce the above copyright yading@10: * notice, this list of conditions and the following disclaimer in the yading@10: * documentation and/or other materials provided with the distribution. yading@10: * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its yading@10: * contributors may be used to endorse or promote products derived from yading@10: * this software without specific prior written permission. yading@10: * yading@10: * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND yading@10: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE yading@10: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE yading@10: * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE yading@10: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL yading@10: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS yading@10: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) yading@10: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT yading@10: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY yading@10: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF yading@10: * SUCH DAMAGE. yading@10: * yading@10: * Author: Stanislav Ocovaj (socovaj@mips.com) yading@10: * Szabolcs Pal (sabolc@mips.com) yading@10: * yading@10: * AAC coefficients encoder optimized for MIPS floating-point architecture yading@10: * yading@10: * This file is part of FFmpeg. yading@10: * yading@10: * FFmpeg is free software; you can redistribute it and/or yading@10: * modify it under the terms of the GNU Lesser General Public yading@10: * License as published by the Free Software Foundation; either yading@10: * version 2.1 of the License, or (at your option) any later version. yading@10: * yading@10: * FFmpeg is distributed in the hope that it will be useful, yading@10: * but WITHOUT ANY WARRANTY; without even the implied warranty of yading@10: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU yading@10: * Lesser General Public License for more details. yading@10: * yading@10: * You should have received a copy of the GNU Lesser General Public yading@10: * License along with FFmpeg; if not, write to the Free Software yading@10: * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA yading@10: */ yading@10: yading@10: /** yading@10: * @file yading@10: * Reference: libavcodec/aaccoder.c yading@10: */ yading@10: yading@10: #include "libavutil/libm.h" yading@10: yading@10: #include yading@10: #include "libavutil/mathematics.h" yading@10: #include "libavcodec/avcodec.h" yading@10: #include "libavcodec/put_bits.h" yading@10: #include "libavcodec/aac.h" yading@10: #include "libavcodec/aacenc.h" yading@10: #include "libavcodec/aactab.h" yading@10: yading@10: #if HAVE_INLINE_ASM yading@10: typedef struct BandCodingPath { yading@10: int prev_idx; yading@10: float cost; yading@10: int run; yading@10: } BandCodingPath; yading@10: yading@10: static const uint8_t run_value_bits_long[64] = { yading@10: 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, yading@10: 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 10, yading@10: 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, yading@10: 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 15 yading@10: }; yading@10: yading@10: static const uint8_t run_value_bits_short[16] = { yading@10: 3, 3, 3, 3, 3, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 9 yading@10: }; yading@10: yading@10: static const uint8_t *run_value_bits[2] = { yading@10: run_value_bits_long, run_value_bits_short yading@10: }; yading@10: yading@10: static const uint8_t uquad_sign_bits[81] = { yading@10: 0, 1, 1, 1, 2, 2, 1, 2, 2, yading@10: 1, 2, 2, 2, 3, 3, 2, 3, 3, yading@10: 1, 2, 2, 2, 3, 3, 2, 3, 3, yading@10: 1, 2, 2, 2, 3, 3, 2, 3, 3, yading@10: 2, 3, 3, 3, 4, 4, 3, 4, 4, yading@10: 2, 3, 3, 3, 4, 4, 3, 4, 4, yading@10: 1, 2, 2, 2, 3, 3, 2, 3, 3, yading@10: 2, 3, 3, 3, 4, 4, 3, 4, 4, yading@10: 2, 3, 3, 3, 4, 4, 3, 4, 4 yading@10: }; yading@10: yading@10: static const uint8_t upair7_sign_bits[64] = { yading@10: 0, 1, 1, 1, 1, 1, 1, 1, yading@10: 1, 2, 2, 2, 2, 2, 2, 2, yading@10: 1, 2, 2, 2, 2, 2, 2, 2, yading@10: 1, 2, 2, 2, 2, 2, 2, 2, yading@10: 1, 2, 2, 2, 2, 2, 2, 2, yading@10: 1, 2, 2, 2, 2, 2, 2, 2, yading@10: 1, 2, 2, 2, 2, 2, 2, 2, yading@10: 1, 2, 2, 2, 2, 2, 2, 2, yading@10: }; yading@10: yading@10: static const uint8_t upair12_sign_bits[169] = { yading@10: 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, yading@10: 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, yading@10: 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, yading@10: 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, yading@10: 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, yading@10: 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, yading@10: 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, yading@10: 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, yading@10: 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, yading@10: 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, yading@10: 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, yading@10: 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, yading@10: 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 yading@10: }; yading@10: yading@10: static const uint8_t esc_sign_bits[289] = { yading@10: 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, yading@10: 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, yading@10: 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, yading@10: 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, yading@10: 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, yading@10: 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, yading@10: 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, yading@10: 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, yading@10: 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, yading@10: 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, yading@10: 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, yading@10: 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, yading@10: 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, yading@10: 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, yading@10: 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, yading@10: 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, yading@10: 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 yading@10: }; yading@10: yading@10: static void abs_pow34_v(float *out, const float *in, const int size) { yading@10: #ifndef USE_REALLY_FULL_SEARCH yading@10: int i; yading@10: float a, b, c, d; yading@10: float ax, bx, cx, dx; yading@10: yading@10: for (i = 0; i < size; i += 4) { yading@10: a = fabsf(in[i ]); yading@10: b = fabsf(in[i+1]); yading@10: c = fabsf(in[i+2]); yading@10: d = fabsf(in[i+3]); yading@10: yading@10: ax = sqrtf(a); yading@10: bx = sqrtf(b); yading@10: cx = sqrtf(c); yading@10: dx = sqrtf(d); yading@10: yading@10: a = a * ax; yading@10: b = b * bx; yading@10: c = c * cx; yading@10: d = d * dx; yading@10: yading@10: out[i ] = sqrtf(a); yading@10: out[i+1] = sqrtf(b); yading@10: out[i+2] = sqrtf(c); yading@10: out[i+3] = sqrtf(d); yading@10: } yading@10: #endif /* USE_REALLY_FULL_SEARCH */ yading@10: } yading@10: yading@10: static float find_max_val(int group_len, int swb_size, const float *scaled) { yading@10: float maxval = 0.0f; yading@10: int w2, i; yading@10: for (w2 = 0; w2 < group_len; w2++) { yading@10: for (i = 0; i < swb_size; i++) { yading@10: maxval = FFMAX(maxval, scaled[w2*128+i]); yading@10: } yading@10: } yading@10: return maxval; yading@10: } yading@10: yading@10: static int find_min_book(float maxval, int sf) { yading@10: float Q = ff_aac_pow2sf_tab[POW_SF2_ZERO - sf + SCALE_ONE_POS - SCALE_DIV_512]; yading@10: float Q34 = sqrtf(Q * sqrtf(Q)); yading@10: int qmaxval, cb; yading@10: qmaxval = maxval * Q34 + 0.4054f; yading@10: if (qmaxval == 0) cb = 0; yading@10: else if (qmaxval == 1) cb = 1; yading@10: else if (qmaxval == 2) cb = 3; yading@10: else if (qmaxval <= 4) cb = 5; yading@10: else if (qmaxval <= 7) cb = 7; yading@10: else if (qmaxval <= 12) cb = 9; yading@10: else cb = 11; yading@10: return cb; yading@10: } yading@10: yading@10: /** yading@10: * Functions developed from template function and optimized for quantizing and encoding band yading@10: */ yading@10: static void quantize_and_encode_band_cost_SQUAD_mips(struct AACEncContext *s, yading@10: PutBitContext *pb, const float *in, yading@10: const float *scaled, int size, int scale_idx, yading@10: int cb, const float lambda, const float uplim, yading@10: int *bits) yading@10: { yading@10: const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512]; yading@10: int i; yading@10: int qc1, qc2, qc3, qc4; yading@10: yading@10: uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1]; yading@10: uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1]; yading@10: yading@10: abs_pow34_v(s->scoefs, in, size); yading@10: scaled = s->scoefs; yading@10: for (i = 0; i < size; i += 4) { yading@10: int curidx; yading@10: int *in_int = (int *)&in[i]; yading@10: yading@10: qc1 = scaled[i ] * Q34 + 0.4054f; yading@10: qc2 = scaled[i+1] * Q34 + 0.4054f; yading@10: qc3 = scaled[i+2] * Q34 + 0.4054f; yading@10: qc4 = scaled[i+3] * Q34 + 0.4054f; yading@10: yading@10: __asm__ volatile ( yading@10: ".set push \n\t" yading@10: ".set noreorder \n\t" yading@10: yading@10: "slt %[qc1], $zero, %[qc1] \n\t" yading@10: "slt %[qc2], $zero, %[qc2] \n\t" yading@10: "slt %[qc3], $zero, %[qc3] \n\t" yading@10: "slt %[qc4], $zero, %[qc4] \n\t" yading@10: "lw $t0, 0(%[in_int]) \n\t" yading@10: "lw $t1, 4(%[in_int]) \n\t" yading@10: "lw $t2, 8(%[in_int]) \n\t" yading@10: "lw $t3, 12(%[in_int]) \n\t" yading@10: "srl $t0, $t0, 31 \n\t" yading@10: "srl $t1, $t1, 31 \n\t" yading@10: "srl $t2, $t2, 31 \n\t" yading@10: "srl $t3, $t3, 31 \n\t" yading@10: "subu $t4, $zero, %[qc1] \n\t" yading@10: "subu $t5, $zero, %[qc2] \n\t" yading@10: "subu $t6, $zero, %[qc3] \n\t" yading@10: "subu $t7, $zero, %[qc4] \n\t" yading@10: "movn %[qc1], $t4, $t0 \n\t" yading@10: "movn %[qc2], $t5, $t1 \n\t" yading@10: "movn %[qc3], $t6, $t2 \n\t" yading@10: "movn %[qc4], $t7, $t3 \n\t" yading@10: yading@10: ".set pop \n\t" yading@10: yading@10: : [qc1]"+r"(qc1), [qc2]"+r"(qc2), yading@10: [qc3]"+r"(qc3), [qc4]"+r"(qc4) yading@10: : [in_int]"r"(in_int) yading@10: : "t0", "t1", "t2", "t3", yading@10: "t4", "t5", "t6", "t7", yading@10: "memory" yading@10: ); yading@10: yading@10: curidx = qc1; yading@10: curidx *= 3; yading@10: curidx += qc2; yading@10: curidx *= 3; yading@10: curidx += qc3; yading@10: curidx *= 3; yading@10: curidx += qc4; yading@10: curidx += 40; yading@10: yading@10: put_bits(pb, p_bits[curidx], p_codes[curidx]); yading@10: } yading@10: } yading@10: yading@10: static void quantize_and_encode_band_cost_UQUAD_mips(struct AACEncContext *s, yading@10: PutBitContext *pb, const float *in, yading@10: const float *scaled, int size, int scale_idx, yading@10: int cb, const float lambda, const float uplim, yading@10: int *bits) yading@10: { yading@10: const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512]; yading@10: int i; yading@10: int qc1, qc2, qc3, qc4; yading@10: yading@10: uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1]; yading@10: uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1]; yading@10: yading@10: abs_pow34_v(s->scoefs, in, size); yading@10: scaled = s->scoefs; yading@10: for (i = 0; i < size; i += 4) { yading@10: int curidx, sign, count; yading@10: int *in_int = (int *)&in[i]; yading@10: uint8_t v_bits; yading@10: unsigned int v_codes; yading@10: yading@10: qc1 = scaled[i ] * Q34 + 0.4054f; yading@10: qc2 = scaled[i+1] * Q34 + 0.4054f; yading@10: qc3 = scaled[i+2] * Q34 + 0.4054f; yading@10: qc4 = scaled[i+3] * Q34 + 0.4054f; yading@10: yading@10: __asm__ volatile ( yading@10: ".set push \n\t" yading@10: ".set noreorder \n\t" yading@10: yading@10: "ori $t4, $zero, 2 \n\t" yading@10: "ori %[sign], $zero, 0 \n\t" yading@10: "slt $t0, $t4, %[qc1] \n\t" yading@10: "slt $t1, $t4, %[qc2] \n\t" yading@10: "slt $t2, $t4, %[qc3] \n\t" yading@10: "slt $t3, $t4, %[qc4] \n\t" yading@10: "movn %[qc1], $t4, $t0 \n\t" yading@10: "movn %[qc2], $t4, $t1 \n\t" yading@10: "movn %[qc3], $t4, $t2 \n\t" yading@10: "movn %[qc4], $t4, $t3 \n\t" yading@10: "lw $t0, 0(%[in_int]) \n\t" yading@10: "lw $t1, 4(%[in_int]) \n\t" yading@10: "lw $t2, 8(%[in_int]) \n\t" yading@10: "lw $t3, 12(%[in_int]) \n\t" yading@10: "slt $t0, $t0, $zero \n\t" yading@10: "movn %[sign], $t0, %[qc1] \n\t" yading@10: "slt $t1, $t1, $zero \n\t" yading@10: "slt $t2, $t2, $zero \n\t" yading@10: "slt $t3, $t3, $zero \n\t" yading@10: "sll $t0, %[sign], 1 \n\t" yading@10: "or $t0, $t0, $t1 \n\t" yading@10: "movn %[sign], $t0, %[qc2] \n\t" yading@10: "slt $t4, $zero, %[qc1] \n\t" yading@10: "slt $t1, $zero, %[qc2] \n\t" yading@10: "slt %[count], $zero, %[qc3] \n\t" yading@10: "sll $t0, %[sign], 1 \n\t" yading@10: "or $t0, $t0, $t2 \n\t" yading@10: "movn %[sign], $t0, %[qc3] \n\t" yading@10: "slt $t2, $zero, %[qc4] \n\t" yading@10: "addu %[count], %[count], $t4 \n\t" yading@10: "addu %[count], %[count], $t1 \n\t" yading@10: "sll $t0, %[sign], 1 \n\t" yading@10: "or $t0, $t0, $t3 \n\t" yading@10: "movn %[sign], $t0, %[qc4] \n\t" yading@10: "addu %[count], %[count], $t2 \n\t" yading@10: yading@10: ".set pop \n\t" yading@10: yading@10: : [qc1]"+r"(qc1), [qc2]"+r"(qc2), yading@10: [qc3]"+r"(qc3), [qc4]"+r"(qc4), yading@10: [sign]"=&r"(sign), [count]"=&r"(count) yading@10: : [in_int]"r"(in_int) yading@10: : "t0", "t1", "t2", "t3", "t4", yading@10: "memory" yading@10: ); yading@10: yading@10: curidx = qc1; yading@10: curidx *= 3; yading@10: curidx += qc2; yading@10: curidx *= 3; yading@10: curidx += qc3; yading@10: curidx *= 3; yading@10: curidx += qc4; yading@10: yading@10: v_codes = (p_codes[curidx] << count) | (sign & ((1 << count) - 1)); yading@10: v_bits = p_bits[curidx] + count; yading@10: put_bits(pb, v_bits, v_codes); yading@10: } yading@10: } yading@10: yading@10: static void quantize_and_encode_band_cost_SPAIR_mips(struct AACEncContext *s, yading@10: PutBitContext *pb, const float *in, yading@10: const float *scaled, int size, int scale_idx, yading@10: int cb, const float lambda, const float uplim, yading@10: int *bits) yading@10: { yading@10: const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512]; yading@10: int i; yading@10: int qc1, qc2, qc3, qc4; yading@10: yading@10: uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1]; yading@10: uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1]; yading@10: yading@10: abs_pow34_v(s->scoefs, in, size); yading@10: scaled = s->scoefs; yading@10: for (i = 0; i < size; i += 4) { yading@10: int curidx, curidx2; yading@10: int *in_int = (int *)&in[i]; yading@10: uint8_t v_bits; yading@10: unsigned int v_codes; yading@10: yading@10: qc1 = scaled[i ] * Q34 + 0.4054f; yading@10: qc2 = scaled[i+1] * Q34 + 0.4054f; yading@10: qc3 = scaled[i+2] * Q34 + 0.4054f; yading@10: qc4 = scaled[i+3] * Q34 + 0.4054f; yading@10: yading@10: __asm__ volatile ( yading@10: ".set push \n\t" yading@10: ".set noreorder \n\t" yading@10: yading@10: "ori $t4, $zero, 4 \n\t" yading@10: "slt $t0, $t4, %[qc1] \n\t" yading@10: "slt $t1, $t4, %[qc2] \n\t" yading@10: "slt $t2, $t4, %[qc3] \n\t" yading@10: "slt $t3, $t4, %[qc4] \n\t" yading@10: "movn %[qc1], $t4, $t0 \n\t" yading@10: "movn %[qc2], $t4, $t1 \n\t" yading@10: "movn %[qc3], $t4, $t2 \n\t" yading@10: "movn %[qc4], $t4, $t3 \n\t" yading@10: "lw $t0, 0(%[in_int]) \n\t" yading@10: "lw $t1, 4(%[in_int]) \n\t" yading@10: "lw $t2, 8(%[in_int]) \n\t" yading@10: "lw $t3, 12(%[in_int]) \n\t" yading@10: "srl $t0, $t0, 31 \n\t" yading@10: "srl $t1, $t1, 31 \n\t" yading@10: "srl $t2, $t2, 31 \n\t" yading@10: "srl $t3, $t3, 31 \n\t" yading@10: "subu $t4, $zero, %[qc1] \n\t" yading@10: "subu $t5, $zero, %[qc2] \n\t" yading@10: "subu $t6, $zero, %[qc3] \n\t" yading@10: "subu $t7, $zero, %[qc4] \n\t" yading@10: "movn %[qc1], $t4, $t0 \n\t" yading@10: "movn %[qc2], $t5, $t1 \n\t" yading@10: "movn %[qc3], $t6, $t2 \n\t" yading@10: "movn %[qc4], $t7, $t3 \n\t" yading@10: yading@10: ".set pop \n\t" yading@10: yading@10: : [qc1]"+r"(qc1), [qc2]"+r"(qc2), yading@10: [qc3]"+r"(qc3), [qc4]"+r"(qc4) yading@10: : [in_int]"r"(in_int) yading@10: : "t0", "t1", "t2", "t3", yading@10: "t4", "t5", "t6", "t7", yading@10: "memory" yading@10: ); yading@10: yading@10: curidx = 9 * qc1; yading@10: curidx += qc2 + 40; yading@10: yading@10: curidx2 = 9 * qc3; yading@10: curidx2 += qc4 + 40; yading@10: yading@10: v_codes = (p_codes[curidx] << p_bits[curidx2]) | (p_codes[curidx2]); yading@10: v_bits = p_bits[curidx] + p_bits[curidx2]; yading@10: put_bits(pb, v_bits, v_codes); yading@10: } yading@10: } yading@10: yading@10: static void quantize_and_encode_band_cost_UPAIR7_mips(struct AACEncContext *s, yading@10: PutBitContext *pb, const float *in, yading@10: const float *scaled, int size, int scale_idx, yading@10: int cb, const float lambda, const float uplim, yading@10: int *bits) yading@10: { yading@10: const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512]; yading@10: int i; yading@10: int qc1, qc2, qc3, qc4; yading@10: yading@10: uint8_t *p_bits = (uint8_t*) ff_aac_spectral_bits[cb-1]; yading@10: uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1]; yading@10: yading@10: abs_pow34_v(s->scoefs, in, size); yading@10: scaled = s->scoefs; yading@10: for (i = 0; i < size; i += 4) { yading@10: int curidx, sign1, count1, sign2, count2; yading@10: int *in_int = (int *)&in[i]; yading@10: uint8_t v_bits; yading@10: unsigned int v_codes; yading@10: yading@10: qc1 = scaled[i ] * Q34 + 0.4054f; yading@10: qc2 = scaled[i+1] * Q34 + 0.4054f; yading@10: qc3 = scaled[i+2] * Q34 + 0.4054f; yading@10: qc4 = scaled[i+3] * Q34 + 0.4054f; yading@10: yading@10: __asm__ volatile ( yading@10: ".set push \n\t" yading@10: ".set noreorder \n\t" yading@10: yading@10: "ori $t4, $zero, 7 \n\t" yading@10: "ori %[sign1], $zero, 0 \n\t" yading@10: "ori %[sign2], $zero, 0 \n\t" yading@10: "slt $t0, $t4, %[qc1] \n\t" yading@10: "slt $t1, $t4, %[qc2] \n\t" yading@10: "slt $t2, $t4, %[qc3] \n\t" yading@10: "slt $t3, $t4, %[qc4] \n\t" yading@10: "movn %[qc1], $t4, $t0 \n\t" yading@10: "movn %[qc2], $t4, $t1 \n\t" yading@10: "movn %[qc3], $t4, $t2 \n\t" yading@10: "movn %[qc4], $t4, $t3 \n\t" yading@10: "lw $t0, 0(%[in_int]) \n\t" yading@10: "lw $t1, 4(%[in_int]) \n\t" yading@10: "lw $t2, 8(%[in_int]) \n\t" yading@10: "lw $t3, 12(%[in_int]) \n\t" yading@10: "slt $t0, $t0, $zero \n\t" yading@10: "movn %[sign1], $t0, %[qc1] \n\t" yading@10: "slt $t2, $t2, $zero \n\t" yading@10: "movn %[sign2], $t2, %[qc3] \n\t" yading@10: "slt $t1, $t1, $zero \n\t" yading@10: "sll $t0, %[sign1], 1 \n\t" yading@10: "or $t0, $t0, $t1 \n\t" yading@10: "movn %[sign1], $t0, %[qc2] \n\t" yading@10: "slt $t3, $t3, $zero \n\t" yading@10: "sll $t0, %[sign2], 1 \n\t" yading@10: "or $t0, $t0, $t3 \n\t" yading@10: "movn %[sign2], $t0, %[qc4] \n\t" yading@10: "slt %[count1], $zero, %[qc1] \n\t" yading@10: "slt $t1, $zero, %[qc2] \n\t" yading@10: "slt %[count2], $zero, %[qc3] \n\t" yading@10: "slt $t2, $zero, %[qc4] \n\t" yading@10: "addu %[count1], %[count1], $t1 \n\t" yading@10: "addu %[count2], %[count2], $t2 \n\t" yading@10: yading@10: ".set pop \n\t" yading@10: yading@10: : [qc1]"+r"(qc1), [qc2]"+r"(qc2), yading@10: [qc3]"+r"(qc3), [qc4]"+r"(qc4), yading@10: [sign1]"=&r"(sign1), [count1]"=&r"(count1), yading@10: [sign2]"=&r"(sign2), [count2]"=&r"(count2) yading@10: : [in_int]"r"(in_int) yading@10: : "t0", "t1", "t2", "t3", "t4", yading@10: "memory" yading@10: ); yading@10: yading@10: curidx = 8 * qc1; yading@10: curidx += qc2; yading@10: yading@10: v_codes = (p_codes[curidx] << count1) | sign1; yading@10: v_bits = p_bits[curidx] + count1; yading@10: put_bits(pb, v_bits, v_codes); yading@10: yading@10: curidx = 8 * qc3; yading@10: curidx += qc4; yading@10: yading@10: v_codes = (p_codes[curidx] << count2) | sign2; yading@10: v_bits = p_bits[curidx] + count2; yading@10: put_bits(pb, v_bits, v_codes); yading@10: } yading@10: } yading@10: yading@10: static void quantize_and_encode_band_cost_UPAIR12_mips(struct AACEncContext *s, yading@10: PutBitContext *pb, const float *in, yading@10: const float *scaled, int size, int scale_idx, yading@10: int cb, const float lambda, const float uplim, yading@10: int *bits) yading@10: { yading@10: const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512]; yading@10: int i; yading@10: int qc1, qc2, qc3, qc4; yading@10: yading@10: uint8_t *p_bits = (uint8_t*) ff_aac_spectral_bits[cb-1]; yading@10: uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1]; yading@10: yading@10: abs_pow34_v(s->scoefs, in, size); yading@10: scaled = s->scoefs; yading@10: for (i = 0; i < size; i += 4) { yading@10: int curidx, sign1, count1, sign2, count2; yading@10: int *in_int = (int *)&in[i]; yading@10: uint8_t v_bits; yading@10: unsigned int v_codes; yading@10: yading@10: qc1 = scaled[i ] * Q34 + 0.4054f; yading@10: qc2 = scaled[i+1] * Q34 + 0.4054f; yading@10: qc3 = scaled[i+2] * Q34 + 0.4054f; yading@10: qc4 = scaled[i+3] * Q34 + 0.4054f; yading@10: yading@10: __asm__ volatile ( yading@10: ".set push \n\t" yading@10: ".set noreorder \n\t" yading@10: yading@10: "ori $t4, $zero, 12 \n\t" yading@10: "ori %[sign1], $zero, 0 \n\t" yading@10: "ori %[sign2], $zero, 0 \n\t" yading@10: "slt $t0, $t4, %[qc1] \n\t" yading@10: "slt $t1, $t4, %[qc2] \n\t" yading@10: "slt $t2, $t4, %[qc3] \n\t" yading@10: "slt $t3, $t4, %[qc4] \n\t" yading@10: "movn %[qc1], $t4, $t0 \n\t" yading@10: "movn %[qc2], $t4, $t1 \n\t" yading@10: "movn %[qc3], $t4, $t2 \n\t" yading@10: "movn %[qc4], $t4, $t3 \n\t" yading@10: "lw $t0, 0(%[in_int]) \n\t" yading@10: "lw $t1, 4(%[in_int]) \n\t" yading@10: "lw $t2, 8(%[in_int]) \n\t" yading@10: "lw $t3, 12(%[in_int]) \n\t" yading@10: "slt $t0, $t0, $zero \n\t" yading@10: "movn %[sign1], $t0, %[qc1] \n\t" yading@10: "slt $t2, $t2, $zero \n\t" yading@10: "movn %[sign2], $t2, %[qc3] \n\t" yading@10: "slt $t1, $t1, $zero \n\t" yading@10: "sll $t0, %[sign1], 1 \n\t" yading@10: "or $t0, $t0, $t1 \n\t" yading@10: "movn %[sign1], $t0, %[qc2] \n\t" yading@10: "slt $t3, $t3, $zero \n\t" yading@10: "sll $t0, %[sign2], 1 \n\t" yading@10: "or $t0, $t0, $t3 \n\t" yading@10: "movn %[sign2], $t0, %[qc4] \n\t" yading@10: "slt %[count1], $zero, %[qc1] \n\t" yading@10: "slt $t1, $zero, %[qc2] \n\t" yading@10: "slt %[count2], $zero, %[qc3] \n\t" yading@10: "slt $t2, $zero, %[qc4] \n\t" yading@10: "addu %[count1], %[count1], $t1 \n\t" yading@10: "addu %[count2], %[count2], $t2 \n\t" yading@10: yading@10: ".set pop \n\t" yading@10: yading@10: : [qc1]"+r"(qc1), [qc2]"+r"(qc2), yading@10: [qc3]"+r"(qc3), [qc4]"+r"(qc4), yading@10: [sign1]"=&r"(sign1), [count1]"=&r"(count1), yading@10: [sign2]"=&r"(sign2), [count2]"=&r"(count2) yading@10: : [in_int]"r"(in_int) yading@10: : "t0", "t1", "t2", "t3", "t4", yading@10: "memory" yading@10: ); yading@10: yading@10: curidx = 13 * qc1; yading@10: curidx += qc2; yading@10: yading@10: v_codes = (p_codes[curidx] << count1) | sign1; yading@10: v_bits = p_bits[curidx] + count1; yading@10: put_bits(pb, v_bits, v_codes); yading@10: yading@10: curidx = 13 * qc3; yading@10: curidx += qc4; yading@10: yading@10: v_codes = (p_codes[curidx] << count2) | sign2; yading@10: v_bits = p_bits[curidx] + count2; yading@10: put_bits(pb, v_bits, v_codes); yading@10: } yading@10: } yading@10: yading@10: static void quantize_and_encode_band_cost_ESC_mips(struct AACEncContext *s, yading@10: PutBitContext *pb, const float *in, yading@10: const float *scaled, int size, int scale_idx, yading@10: int cb, const float lambda, const float uplim, yading@10: int *bits) yading@10: { yading@10: const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512]; yading@10: int i; yading@10: int qc1, qc2, qc3, qc4; yading@10: yading@10: uint8_t *p_bits = (uint8_t* )ff_aac_spectral_bits[cb-1]; yading@10: uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1]; yading@10: float *p_vectors = (float* )ff_aac_codebook_vectors[cb-1]; yading@10: yading@10: abs_pow34_v(s->scoefs, in, size); yading@10: scaled = s->scoefs; yading@10: yading@10: if (cb < 11) { yading@10: for (i = 0; i < size; i += 4) { yading@10: int curidx, curidx2, sign1, count1, sign2, count2; yading@10: int *in_int = (int *)&in[i]; yading@10: uint8_t v_bits; yading@10: unsigned int v_codes; yading@10: yading@10: qc1 = scaled[i ] * Q34 + 0.4054f; yading@10: qc2 = scaled[i+1] * Q34 + 0.4054f; yading@10: qc3 = scaled[i+2] * Q34 + 0.4054f; yading@10: qc4 = scaled[i+3] * Q34 + 0.4054f; yading@10: yading@10: __asm__ volatile ( yading@10: ".set push \n\t" yading@10: ".set noreorder \n\t" yading@10: yading@10: "ori $t4, $zero, 16 \n\t" yading@10: "ori %[sign1], $zero, 0 \n\t" yading@10: "ori %[sign2], $zero, 0 \n\t" yading@10: "slt $t0, $t4, %[qc1] \n\t" yading@10: "slt $t1, $t4, %[qc2] \n\t" yading@10: "slt $t2, $t4, %[qc3] \n\t" yading@10: "slt $t3, $t4, %[qc4] \n\t" yading@10: "movn %[qc1], $t4, $t0 \n\t" yading@10: "movn %[qc2], $t4, $t1 \n\t" yading@10: "movn %[qc3], $t4, $t2 \n\t" yading@10: "movn %[qc4], $t4, $t3 \n\t" yading@10: "lw $t0, 0(%[in_int]) \n\t" yading@10: "lw $t1, 4(%[in_int]) \n\t" yading@10: "lw $t2, 8(%[in_int]) \n\t" yading@10: "lw $t3, 12(%[in_int]) \n\t" yading@10: "slt $t0, $t0, $zero \n\t" yading@10: "movn %[sign1], $t0, %[qc1] \n\t" yading@10: "slt $t2, $t2, $zero \n\t" yading@10: "movn %[sign2], $t2, %[qc3] \n\t" yading@10: "slt $t1, $t1, $zero \n\t" yading@10: "sll $t0, %[sign1], 1 \n\t" yading@10: "or $t0, $t0, $t1 \n\t" yading@10: "movn %[sign1], $t0, %[qc2] \n\t" yading@10: "slt $t3, $t3, $zero \n\t" yading@10: "sll $t0, %[sign2], 1 \n\t" yading@10: "or $t0, $t0, $t3 \n\t" yading@10: "movn %[sign2], $t0, %[qc4] \n\t" yading@10: "slt %[count1], $zero, %[qc1] \n\t" yading@10: "slt $t1, $zero, %[qc2] \n\t" yading@10: "slt %[count2], $zero, %[qc3] \n\t" yading@10: "slt $t2, $zero, %[qc4] \n\t" yading@10: "addu %[count1], %[count1], $t1 \n\t" yading@10: "addu %[count2], %[count2], $t2 \n\t" yading@10: yading@10: ".set pop \n\t" yading@10: yading@10: : [qc1]"+r"(qc1), [qc2]"+r"(qc2), yading@10: [qc3]"+r"(qc3), [qc4]"+r"(qc4), yading@10: [sign1]"=&r"(sign1), [count1]"=&r"(count1), yading@10: [sign2]"=&r"(sign2), [count2]"=&r"(count2) yading@10: : [in_int]"r"(in_int) yading@10: : "t0", "t1", "t2", "t3", "t4", yading@10: "memory" yading@10: ); yading@10: yading@10: curidx = 17 * qc1; yading@10: curidx += qc2; yading@10: curidx2 = 17 * qc3; yading@10: curidx2 += qc4; yading@10: yading@10: v_codes = (p_codes[curidx] << count1) | sign1; yading@10: v_bits = p_bits[curidx] + count1; yading@10: put_bits(pb, v_bits, v_codes); yading@10: yading@10: v_codes = (p_codes[curidx2] << count2) | sign2; yading@10: v_bits = p_bits[curidx2] + count2; yading@10: put_bits(pb, v_bits, v_codes); yading@10: } yading@10: } else { yading@10: for (i = 0; i < size; i += 4) { yading@10: int curidx, curidx2, sign1, count1, sign2, count2; yading@10: int *in_int = (int *)&in[i]; yading@10: uint8_t v_bits; yading@10: unsigned int v_codes; yading@10: int c1, c2, c3, c4; yading@10: yading@10: qc1 = scaled[i ] * Q34 + 0.4054f; yading@10: qc2 = scaled[i+1] * Q34 + 0.4054f; yading@10: qc3 = scaled[i+2] * Q34 + 0.4054f; yading@10: qc4 = scaled[i+3] * Q34 + 0.4054f; yading@10: yading@10: __asm__ volatile ( yading@10: ".set push \n\t" yading@10: ".set noreorder \n\t" yading@10: yading@10: "ori $t4, $zero, 16 \n\t" yading@10: "ori %[sign1], $zero, 0 \n\t" yading@10: "ori %[sign2], $zero, 0 \n\t" yading@10: "shll_s.w %[c1], %[qc1], 18 \n\t" yading@10: "shll_s.w %[c2], %[qc2], 18 \n\t" yading@10: "shll_s.w %[c3], %[qc3], 18 \n\t" yading@10: "shll_s.w %[c4], %[qc4], 18 \n\t" yading@10: "srl %[c1], %[c1], 18 \n\t" yading@10: "srl %[c2], %[c2], 18 \n\t" yading@10: "srl %[c3], %[c3], 18 \n\t" yading@10: "srl %[c4], %[c4], 18 \n\t" yading@10: "slt $t0, $t4, %[qc1] \n\t" yading@10: "slt $t1, $t4, %[qc2] \n\t" yading@10: "slt $t2, $t4, %[qc3] \n\t" yading@10: "slt $t3, $t4, %[qc4] \n\t" yading@10: "movn %[qc1], $t4, $t0 \n\t" yading@10: "movn %[qc2], $t4, $t1 \n\t" yading@10: "movn %[qc3], $t4, $t2 \n\t" yading@10: "movn %[qc4], $t4, $t3 \n\t" yading@10: "lw $t0, 0(%[in_int]) \n\t" yading@10: "lw $t1, 4(%[in_int]) \n\t" yading@10: "lw $t2, 8(%[in_int]) \n\t" yading@10: "lw $t3, 12(%[in_int]) \n\t" yading@10: "slt $t0, $t0, $zero \n\t" yading@10: "movn %[sign1], $t0, %[qc1] \n\t" yading@10: "slt $t2, $t2, $zero \n\t" yading@10: "movn %[sign2], $t2, %[qc3] \n\t" yading@10: "slt $t1, $t1, $zero \n\t" yading@10: "sll $t0, %[sign1], 1 \n\t" yading@10: "or $t0, $t0, $t1 \n\t" yading@10: "movn %[sign1], $t0, %[qc2] \n\t" yading@10: "slt $t3, $t3, $zero \n\t" yading@10: "sll $t0, %[sign2], 1 \n\t" yading@10: "or $t0, $t0, $t3 \n\t" yading@10: "movn %[sign2], $t0, %[qc4] \n\t" yading@10: "slt %[count1], $zero, %[qc1] \n\t" yading@10: "slt $t1, $zero, %[qc2] \n\t" yading@10: "slt %[count2], $zero, %[qc3] \n\t" yading@10: "slt $t2, $zero, %[qc4] \n\t" yading@10: "addu %[count1], %[count1], $t1 \n\t" yading@10: "addu %[count2], %[count2], $t2 \n\t" yading@10: yading@10: ".set pop \n\t" yading@10: yading@10: : [qc1]"+r"(qc1), [qc2]"+r"(qc2), yading@10: [qc3]"+r"(qc3), [qc4]"+r"(qc4), yading@10: [sign1]"=&r"(sign1), [count1]"=&r"(count1), yading@10: [sign2]"=&r"(sign2), [count2]"=&r"(count2), yading@10: [c1]"=&r"(c1), [c2]"=&r"(c2), yading@10: [c3]"=&r"(c3), [c4]"=&r"(c4) yading@10: : [in_int]"r"(in_int) yading@10: : "t0", "t1", "t2", "t3", "t4", yading@10: "memory" yading@10: ); yading@10: yading@10: curidx = 17 * qc1; yading@10: curidx += qc2; yading@10: yading@10: curidx2 = 17 * qc3; yading@10: curidx2 += qc4; yading@10: yading@10: v_codes = (p_codes[curidx] << count1) | sign1; yading@10: v_bits = p_bits[curidx] + count1; yading@10: put_bits(pb, v_bits, v_codes); yading@10: yading@10: if (p_vectors[curidx*2 ] == 64.0f) { yading@10: int len = av_log2(c1); yading@10: v_codes = (((1 << (len - 3)) - 2) << len) | (c1 & ((1 << len) - 1)); yading@10: put_bits(pb, len * 2 - 3, v_codes); yading@10: } yading@10: if (p_vectors[curidx*2+1] == 64.0f) { yading@10: int len = av_log2(c2); yading@10: v_codes = (((1 << (len - 3)) - 2) << len) | (c2 & ((1 << len) - 1)); yading@10: put_bits(pb, len*2-3, v_codes); yading@10: } yading@10: yading@10: v_codes = (p_codes[curidx2] << count2) | sign2; yading@10: v_bits = p_bits[curidx2] + count2; yading@10: put_bits(pb, v_bits, v_codes); yading@10: yading@10: if (p_vectors[curidx2*2 ] == 64.0f) { yading@10: int len = av_log2(c3); yading@10: v_codes = (((1 << (len - 3)) - 2) << len) | (c3 & ((1 << len) - 1)); yading@10: put_bits(pb, len* 2 - 3, v_codes); yading@10: } yading@10: if (p_vectors[curidx2*2+1] == 64.0f) { yading@10: int len = av_log2(c4); yading@10: v_codes = (((1 << (len - 3)) - 2) << len) | (c4 & ((1 << len) - 1)); yading@10: put_bits(pb, len * 2 - 3, v_codes); yading@10: } yading@10: } yading@10: } yading@10: } yading@10: yading@10: static void (*const quantize_and_encode_band_cost_arr[])(struct AACEncContext *s, yading@10: PutBitContext *pb, const float *in, yading@10: const float *scaled, int size, int scale_idx, yading@10: int cb, const float lambda, const float uplim, yading@10: int *bits) = { yading@10: NULL, yading@10: quantize_and_encode_band_cost_SQUAD_mips, yading@10: quantize_and_encode_band_cost_SQUAD_mips, yading@10: quantize_and_encode_band_cost_UQUAD_mips, yading@10: quantize_and_encode_band_cost_UQUAD_mips, yading@10: quantize_and_encode_band_cost_SPAIR_mips, yading@10: quantize_and_encode_band_cost_SPAIR_mips, yading@10: quantize_and_encode_band_cost_UPAIR7_mips, yading@10: quantize_and_encode_band_cost_UPAIR7_mips, yading@10: quantize_and_encode_band_cost_UPAIR12_mips, yading@10: quantize_and_encode_band_cost_UPAIR12_mips, yading@10: quantize_and_encode_band_cost_ESC_mips, yading@10: }; yading@10: yading@10: #define quantize_and_encode_band_cost( \ yading@10: s, pb, in, scaled, size, scale_idx, cb, \ yading@10: lambda, uplim, bits) \ yading@10: quantize_and_encode_band_cost_arr[cb]( \ yading@10: s, pb, in, scaled, size, scale_idx, cb, \ yading@10: lambda, uplim, bits) yading@10: yading@10: static void quantize_and_encode_band_mips(struct AACEncContext *s, PutBitContext *pb, yading@10: const float *in, int size, int scale_idx, yading@10: int cb, const float lambda) yading@10: { yading@10: quantize_and_encode_band_cost(s, pb, in, NULL, size, scale_idx, cb, lambda, yading@10: INFINITY, NULL); yading@10: } yading@10: yading@10: /** yading@10: * Functions developed from template function and optimized for getting the number of bits yading@10: */ yading@10: static float get_band_numbits_ZERO_mips(struct AACEncContext *s, yading@10: PutBitContext *pb, const float *in, yading@10: const float *scaled, int size, int scale_idx, yading@10: int cb, const float lambda, const float uplim, yading@10: int *bits) yading@10: { yading@10: return 0; yading@10: } yading@10: yading@10: static float get_band_numbits_SQUAD_mips(struct AACEncContext *s, yading@10: PutBitContext *pb, const float *in, yading@10: const float *scaled, int size, int scale_idx, yading@10: int cb, const float lambda, const float uplim, yading@10: int *bits) yading@10: { yading@10: const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512]; yading@10: int i; yading@10: int qc1, qc2, qc3, qc4; yading@10: int curbits = 0; yading@10: yading@10: uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1]; yading@10: yading@10: for (i = 0; i < size; i += 4) { yading@10: int curidx; yading@10: int *in_int = (int *)&in[i]; yading@10: yading@10: qc1 = scaled[i ] * Q34 + 0.4054f; yading@10: qc2 = scaled[i+1] * Q34 + 0.4054f; yading@10: qc3 = scaled[i+2] * Q34 + 0.4054f; yading@10: qc4 = scaled[i+3] * Q34 + 0.4054f; yading@10: yading@10: __asm__ volatile ( yading@10: ".set push \n\t" yading@10: ".set noreorder \n\t" yading@10: yading@10: "slt %[qc1], $zero, %[qc1] \n\t" yading@10: "slt %[qc2], $zero, %[qc2] \n\t" yading@10: "slt %[qc3], $zero, %[qc3] \n\t" yading@10: "slt %[qc4], $zero, %[qc4] \n\t" yading@10: "lw $t0, 0(%[in_int]) \n\t" yading@10: "lw $t1, 4(%[in_int]) \n\t" yading@10: "lw $t2, 8(%[in_int]) \n\t" yading@10: "lw $t3, 12(%[in_int]) \n\t" yading@10: "srl $t0, $t0, 31 \n\t" yading@10: "srl $t1, $t1, 31 \n\t" yading@10: "srl $t2, $t2, 31 \n\t" yading@10: "srl $t3, $t3, 31 \n\t" yading@10: "subu $t4, $zero, %[qc1] \n\t" yading@10: "subu $t5, $zero, %[qc2] \n\t" yading@10: "subu $t6, $zero, %[qc3] \n\t" yading@10: "subu $t7, $zero, %[qc4] \n\t" yading@10: "movn %[qc1], $t4, $t0 \n\t" yading@10: "movn %[qc2], $t5, $t1 \n\t" yading@10: "movn %[qc3], $t6, $t2 \n\t" yading@10: "movn %[qc4], $t7, $t3 \n\t" yading@10: yading@10: ".set pop \n\t" yading@10: yading@10: : [qc1]"+r"(qc1), [qc2]"+r"(qc2), yading@10: [qc3]"+r"(qc3), [qc4]"+r"(qc4) yading@10: : [in_int]"r"(in_int) yading@10: : "t0", "t1", "t2", "t3", yading@10: "t4", "t5", "t6", "t7", yading@10: "memory" yading@10: ); yading@10: yading@10: curidx = qc1; yading@10: curidx *= 3; yading@10: curidx += qc2; yading@10: curidx *= 3; yading@10: curidx += qc3; yading@10: curidx *= 3; yading@10: curidx += qc4; yading@10: curidx += 40; yading@10: yading@10: curbits += p_bits[curidx]; yading@10: } yading@10: return curbits; yading@10: } yading@10: yading@10: static float get_band_numbits_UQUAD_mips(struct AACEncContext *s, yading@10: PutBitContext *pb, const float *in, yading@10: const float *scaled, int size, int scale_idx, yading@10: int cb, const float lambda, const float uplim, yading@10: int *bits) yading@10: { yading@10: const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512]; yading@10: int i; yading@10: int curbits = 0; yading@10: int qc1, qc2, qc3, qc4; yading@10: yading@10: uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1]; yading@10: yading@10: for (i = 0; i < size; i += 4) { yading@10: int curidx; yading@10: yading@10: qc1 = scaled[i ] * Q34 + 0.4054f; yading@10: qc2 = scaled[i+1] * Q34 + 0.4054f; yading@10: qc3 = scaled[i+2] * Q34 + 0.4054f; yading@10: qc4 = scaled[i+3] * Q34 + 0.4054f; yading@10: yading@10: __asm__ volatile ( yading@10: ".set push \n\t" yading@10: ".set noreorder \n\t" yading@10: yading@10: "ori $t4, $zero, 2 \n\t" yading@10: "slt $t0, $t4, %[qc1] \n\t" yading@10: "slt $t1, $t4, %[qc2] \n\t" yading@10: "slt $t2, $t4, %[qc3] \n\t" yading@10: "slt $t3, $t4, %[qc4] \n\t" yading@10: "movn %[qc1], $t4, $t0 \n\t" yading@10: "movn %[qc2], $t4, $t1 \n\t" yading@10: "movn %[qc3], $t4, $t2 \n\t" yading@10: "movn %[qc4], $t4, $t3 \n\t" yading@10: yading@10: ".set pop \n\t" yading@10: yading@10: : [qc1]"+r"(qc1), [qc2]"+r"(qc2), yading@10: [qc3]"+r"(qc3), [qc4]"+r"(qc4) yading@10: : yading@10: : "t0", "t1", "t2", "t3", "t4" yading@10: ); yading@10: yading@10: curidx = qc1; yading@10: curidx *= 3; yading@10: curidx += qc2; yading@10: curidx *= 3; yading@10: curidx += qc3; yading@10: curidx *= 3; yading@10: curidx += qc4; yading@10: yading@10: curbits += p_bits[curidx]; yading@10: curbits += uquad_sign_bits[curidx]; yading@10: } yading@10: return curbits; yading@10: } yading@10: yading@10: static float get_band_numbits_SPAIR_mips(struct AACEncContext *s, yading@10: PutBitContext *pb, const float *in, yading@10: const float *scaled, int size, int scale_idx, yading@10: int cb, const float lambda, const float uplim, yading@10: int *bits) yading@10: { yading@10: const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512]; yading@10: int i; yading@10: int qc1, qc2, qc3, qc4; yading@10: int curbits = 0; yading@10: yading@10: uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1]; yading@10: yading@10: for (i = 0; i < size; i += 4) { yading@10: int curidx, curidx2; yading@10: int *in_int = (int *)&in[i]; yading@10: yading@10: qc1 = scaled[i ] * Q34 + 0.4054f; yading@10: qc2 = scaled[i+1] * Q34 + 0.4054f; yading@10: qc3 = scaled[i+2] * Q34 + 0.4054f; yading@10: qc4 = scaled[i+3] * Q34 + 0.4054f; yading@10: yading@10: __asm__ volatile ( yading@10: ".set push \n\t" yading@10: ".set noreorder \n\t" yading@10: yading@10: "ori $t4, $zero, 4 \n\t" yading@10: "slt $t0, $t4, %[qc1] \n\t" yading@10: "slt $t1, $t4, %[qc2] \n\t" yading@10: "slt $t2, $t4, %[qc3] \n\t" yading@10: "slt $t3, $t4, %[qc4] \n\t" yading@10: "movn %[qc1], $t4, $t0 \n\t" yading@10: "movn %[qc2], $t4, $t1 \n\t" yading@10: "movn %[qc3], $t4, $t2 \n\t" yading@10: "movn %[qc4], $t4, $t3 \n\t" yading@10: "lw $t0, 0(%[in_int]) \n\t" yading@10: "lw $t1, 4(%[in_int]) \n\t" yading@10: "lw $t2, 8(%[in_int]) \n\t" yading@10: "lw $t3, 12(%[in_int]) \n\t" yading@10: "srl $t0, $t0, 31 \n\t" yading@10: "srl $t1, $t1, 31 \n\t" yading@10: "srl $t2, $t2, 31 \n\t" yading@10: "srl $t3, $t3, 31 \n\t" yading@10: "subu $t4, $zero, %[qc1] \n\t" yading@10: "subu $t5, $zero, %[qc2] \n\t" yading@10: "subu $t6, $zero, %[qc3] \n\t" yading@10: "subu $t7, $zero, %[qc4] \n\t" yading@10: "movn %[qc1], $t4, $t0 \n\t" yading@10: "movn %[qc2], $t5, $t1 \n\t" yading@10: "movn %[qc3], $t6, $t2 \n\t" yading@10: "movn %[qc4], $t7, $t3 \n\t" yading@10: yading@10: ".set pop \n\t" yading@10: yading@10: : [qc1]"+r"(qc1), [qc2]"+r"(qc2), yading@10: [qc3]"+r"(qc3), [qc4]"+r"(qc4) yading@10: : [in_int]"r"(in_int) yading@10: : "t0", "t1", "t2", "t3", yading@10: "t4", "t5", "t6", "t7", yading@10: "memory" yading@10: ); yading@10: yading@10: curidx = 9 * qc1; yading@10: curidx += qc2 + 40; yading@10: yading@10: curidx2 = 9 * qc3; yading@10: curidx2 += qc4 + 40; yading@10: yading@10: curbits += p_bits[curidx] + p_bits[curidx2]; yading@10: } yading@10: return curbits; yading@10: } yading@10: yading@10: static float get_band_numbits_UPAIR7_mips(struct AACEncContext *s, yading@10: PutBitContext *pb, const float *in, yading@10: const float *scaled, int size, int scale_idx, yading@10: int cb, const float lambda, const float uplim, yading@10: int *bits) yading@10: { yading@10: const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512]; yading@10: int i; yading@10: int qc1, qc2, qc3, qc4; yading@10: int curbits = 0; yading@10: yading@10: uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1]; yading@10: yading@10: for (i = 0; i < size; i += 4) { yading@10: int curidx, curidx2; yading@10: yading@10: qc1 = scaled[i ] * Q34 + 0.4054f; yading@10: qc2 = scaled[i+1] * Q34 + 0.4054f; yading@10: qc3 = scaled[i+2] * Q34 + 0.4054f; yading@10: qc4 = scaled[i+3] * Q34 + 0.4054f; yading@10: yading@10: __asm__ volatile ( yading@10: ".set push \n\t" yading@10: ".set noreorder \n\t" yading@10: yading@10: "ori $t4, $zero, 7 \n\t" yading@10: "slt $t0, $t4, %[qc1] \n\t" yading@10: "slt $t1, $t4, %[qc2] \n\t" yading@10: "slt $t2, $t4, %[qc3] \n\t" yading@10: "slt $t3, $t4, %[qc4] \n\t" yading@10: "movn %[qc1], $t4, $t0 \n\t" yading@10: "movn %[qc2], $t4, $t1 \n\t" yading@10: "movn %[qc3], $t4, $t2 \n\t" yading@10: "movn %[qc4], $t4, $t3 \n\t" yading@10: yading@10: ".set pop \n\t" yading@10: yading@10: : [qc1]"+r"(qc1), [qc2]"+r"(qc2), yading@10: [qc3]"+r"(qc3), [qc4]"+r"(qc4) yading@10: : yading@10: : "t0", "t1", "t2", "t3", "t4" yading@10: ); yading@10: yading@10: curidx = 8 * qc1; yading@10: curidx += qc2; yading@10: yading@10: curidx2 = 8 * qc3; yading@10: curidx2 += qc4; yading@10: yading@10: curbits += p_bits[curidx] + yading@10: upair7_sign_bits[curidx] + yading@10: p_bits[curidx2] + yading@10: upair7_sign_bits[curidx2]; yading@10: } yading@10: return curbits; yading@10: } yading@10: yading@10: static float get_band_numbits_UPAIR12_mips(struct AACEncContext *s, yading@10: PutBitContext *pb, const float *in, yading@10: const float *scaled, int size, int scale_idx, yading@10: int cb, const float lambda, const float uplim, yading@10: int *bits) yading@10: { yading@10: const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512]; yading@10: int i; yading@10: int qc1, qc2, qc3, qc4; yading@10: int curbits = 0; yading@10: yading@10: uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1]; yading@10: yading@10: for (i = 0; i < size; i += 4) { yading@10: int curidx, curidx2; yading@10: yading@10: qc1 = scaled[i ] * Q34 + 0.4054f; yading@10: qc2 = scaled[i+1] * Q34 + 0.4054f; yading@10: qc3 = scaled[i+2] * Q34 + 0.4054f; yading@10: qc4 = scaled[i+3] * Q34 + 0.4054f; yading@10: yading@10: __asm__ volatile ( yading@10: ".set push \n\t" yading@10: ".set noreorder \n\t" yading@10: yading@10: "ori $t4, $zero, 12 \n\t" yading@10: "slt $t0, $t4, %[qc1] \n\t" yading@10: "slt $t1, $t4, %[qc2] \n\t" yading@10: "slt $t2, $t4, %[qc3] \n\t" yading@10: "slt $t3, $t4, %[qc4] \n\t" yading@10: "movn %[qc1], $t4, $t0 \n\t" yading@10: "movn %[qc2], $t4, $t1 \n\t" yading@10: "movn %[qc3], $t4, $t2 \n\t" yading@10: "movn %[qc4], $t4, $t3 \n\t" yading@10: yading@10: ".set pop \n\t" yading@10: yading@10: : [qc1]"+r"(qc1), [qc2]"+r"(qc2), yading@10: [qc3]"+r"(qc3), [qc4]"+r"(qc4) yading@10: : yading@10: : "t0", "t1", "t2", "t3", "t4" yading@10: ); yading@10: yading@10: curidx = 13 * qc1; yading@10: curidx += qc2; yading@10: yading@10: curidx2 = 13 * qc3; yading@10: curidx2 += qc4; yading@10: yading@10: curbits += p_bits[curidx] + yading@10: p_bits[curidx2] + yading@10: upair12_sign_bits[curidx] + yading@10: upair12_sign_bits[curidx2]; yading@10: } yading@10: return curbits; yading@10: } yading@10: yading@10: static float get_band_numbits_ESC_mips(struct AACEncContext *s, yading@10: PutBitContext *pb, const float *in, yading@10: const float *scaled, int size, int scale_idx, yading@10: int cb, const float lambda, const float uplim, yading@10: int *bits) yading@10: { yading@10: const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512]; yading@10: int i; yading@10: int qc1, qc2, qc3, qc4; yading@10: int curbits = 0; yading@10: yading@10: uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1]; yading@10: yading@10: for (i = 0; i < size; i += 4) { yading@10: int curidx, curidx2; yading@10: int cond0, cond1, cond2, cond3; yading@10: int c1, c2, c3, c4; yading@10: yading@10: qc1 = scaled[i ] * Q34 + 0.4054f; yading@10: qc2 = scaled[i+1] * Q34 + 0.4054f; yading@10: qc3 = scaled[i+2] * Q34 + 0.4054f; yading@10: qc4 = scaled[i+3] * Q34 + 0.4054f; yading@10: yading@10: __asm__ volatile ( yading@10: ".set push \n\t" yading@10: ".set noreorder \n\t" yading@10: yading@10: "ori $t4, $zero, 15 \n\t" yading@10: "ori $t5, $zero, 16 \n\t" yading@10: "shll_s.w %[c1], %[qc1], 18 \n\t" yading@10: "shll_s.w %[c2], %[qc2], 18 \n\t" yading@10: "shll_s.w %[c3], %[qc3], 18 \n\t" yading@10: "shll_s.w %[c4], %[qc4], 18 \n\t" yading@10: "srl %[c1], %[c1], 18 \n\t" yading@10: "srl %[c2], %[c2], 18 \n\t" yading@10: "srl %[c3], %[c3], 18 \n\t" yading@10: "srl %[c4], %[c4], 18 \n\t" yading@10: "slt %[cond0], $t4, %[qc1] \n\t" yading@10: "slt %[cond1], $t4, %[qc2] \n\t" yading@10: "slt %[cond2], $t4, %[qc3] \n\t" yading@10: "slt %[cond3], $t4, %[qc4] \n\t" yading@10: "movn %[qc1], $t5, %[cond0] \n\t" yading@10: "movn %[qc2], $t5, %[cond1] \n\t" yading@10: "movn %[qc3], $t5, %[cond2] \n\t" yading@10: "movn %[qc4], $t5, %[cond3] \n\t" yading@10: "ori $t5, $zero, 31 \n\t" yading@10: "clz %[c1], %[c1] \n\t" yading@10: "clz %[c2], %[c2] \n\t" yading@10: "clz %[c3], %[c3] \n\t" yading@10: "clz %[c4], %[c4] \n\t" yading@10: "subu %[c1], $t5, %[c1] \n\t" yading@10: "subu %[c2], $t5, %[c2] \n\t" yading@10: "subu %[c3], $t5, %[c3] \n\t" yading@10: "subu %[c4], $t5, %[c4] \n\t" yading@10: "sll %[c1], %[c1], 1 \n\t" yading@10: "sll %[c2], %[c2], 1 \n\t" yading@10: "sll %[c3], %[c3], 1 \n\t" yading@10: "sll %[c4], %[c4], 1 \n\t" yading@10: "addiu %[c1], %[c1], -3 \n\t" yading@10: "addiu %[c2], %[c2], -3 \n\t" yading@10: "addiu %[c3], %[c3], -3 \n\t" yading@10: "addiu %[c4], %[c4], -3 \n\t" yading@10: "subu %[cond0], $zero, %[cond0] \n\t" yading@10: "subu %[cond1], $zero, %[cond1] \n\t" yading@10: "subu %[cond2], $zero, %[cond2] \n\t" yading@10: "subu %[cond3], $zero, %[cond3] \n\t" yading@10: "and %[c1], %[c1], %[cond0] \n\t" yading@10: "and %[c2], %[c2], %[cond1] \n\t" yading@10: "and %[c3], %[c3], %[cond2] \n\t" yading@10: "and %[c4], %[c4], %[cond3] \n\t" yading@10: yading@10: ".set pop \n\t" yading@10: yading@10: : [qc1]"+r"(qc1), [qc2]"+r"(qc2), yading@10: [qc3]"+r"(qc3), [qc4]"+r"(qc4), yading@10: [cond0]"=&r"(cond0), [cond1]"=&r"(cond1), yading@10: [cond2]"=&r"(cond2), [cond3]"=&r"(cond3), yading@10: [c1]"=&r"(c1), [c2]"=&r"(c2), yading@10: [c3]"=&r"(c3), [c4]"=&r"(c4) yading@10: : yading@10: : "t4", "t5" yading@10: ); yading@10: yading@10: curidx = 17 * qc1; yading@10: curidx += qc2; yading@10: yading@10: curidx2 = 17 * qc3; yading@10: curidx2 += qc4; yading@10: yading@10: curbits += p_bits[curidx]; yading@10: curbits += esc_sign_bits[curidx]; yading@10: curbits += p_bits[curidx2]; yading@10: curbits += esc_sign_bits[curidx2]; yading@10: yading@10: curbits += c1; yading@10: curbits += c2; yading@10: curbits += c3; yading@10: curbits += c4; yading@10: } yading@10: return curbits; yading@10: } yading@10: yading@10: static float (*const get_band_numbits_arr[])(struct AACEncContext *s, yading@10: PutBitContext *pb, const float *in, yading@10: const float *scaled, int size, int scale_idx, yading@10: int cb, const float lambda, const float uplim, yading@10: int *bits) = { yading@10: get_band_numbits_ZERO_mips, yading@10: get_band_numbits_SQUAD_mips, yading@10: get_band_numbits_SQUAD_mips, yading@10: get_band_numbits_UQUAD_mips, yading@10: get_band_numbits_UQUAD_mips, yading@10: get_band_numbits_SPAIR_mips, yading@10: get_band_numbits_SPAIR_mips, yading@10: get_band_numbits_UPAIR7_mips, yading@10: get_band_numbits_UPAIR7_mips, yading@10: get_band_numbits_UPAIR12_mips, yading@10: get_band_numbits_UPAIR12_mips, yading@10: get_band_numbits_ESC_mips, yading@10: }; yading@10: yading@10: #define get_band_numbits( \ yading@10: s, pb, in, scaled, size, scale_idx, cb, \ yading@10: lambda, uplim, bits) \ yading@10: get_band_numbits_arr[cb]( \ yading@10: s, pb, in, scaled, size, scale_idx, cb, \ yading@10: lambda, uplim, bits) yading@10: yading@10: static float quantize_band_cost_bits(struct AACEncContext *s, const float *in, yading@10: const float *scaled, int size, int scale_idx, yading@10: int cb, const float lambda, const float uplim, yading@10: int *bits) yading@10: { yading@10: return get_band_numbits(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits); yading@10: } yading@10: yading@10: /** yading@10: * Functions developed from template function and optimized for getting the band cost yading@10: */ yading@10: #if HAVE_MIPSFPU yading@10: static float get_band_cost_ZERO_mips(struct AACEncContext *s, yading@10: PutBitContext *pb, const float *in, yading@10: const float *scaled, int size, int scale_idx, yading@10: int cb, const float lambda, const float uplim, yading@10: int *bits) yading@10: { yading@10: int i; yading@10: float cost = 0; yading@10: yading@10: for (i = 0; i < size; i += 4) { yading@10: cost += in[i ] * in[i ]; yading@10: cost += in[i+1] * in[i+1]; yading@10: cost += in[i+2] * in[i+2]; yading@10: cost += in[i+3] * in[i+3]; yading@10: } yading@10: if (bits) yading@10: *bits = 0; yading@10: return cost * lambda; yading@10: } yading@10: yading@10: static float get_band_cost_SQUAD_mips(struct AACEncContext *s, yading@10: PutBitContext *pb, const float *in, yading@10: const float *scaled, int size, int scale_idx, yading@10: int cb, const float lambda, const float uplim, yading@10: int *bits) yading@10: { yading@10: const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512]; yading@10: const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512]; yading@10: int i; yading@10: float cost = 0; yading@10: int qc1, qc2, qc3, qc4; yading@10: int curbits = 0; yading@10: yading@10: uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1]; yading@10: float *p_codes = (float *)ff_aac_codebook_vectors[cb-1]; yading@10: yading@10: for (i = 0; i < size; i += 4) { yading@10: const float *vec; yading@10: int curidx; yading@10: int *in_int = (int *)&in[i]; yading@10: float *in_pos = (float *)&in[i]; yading@10: float di0, di1, di2, di3; yading@10: yading@10: qc1 = scaled[i ] * Q34 + 0.4054f; yading@10: qc2 = scaled[i+1] * Q34 + 0.4054f; yading@10: qc3 = scaled[i+2] * Q34 + 0.4054f; yading@10: qc4 = scaled[i+3] * Q34 + 0.4054f; yading@10: yading@10: __asm__ volatile ( yading@10: ".set push \n\t" yading@10: ".set noreorder \n\t" yading@10: yading@10: "slt %[qc1], $zero, %[qc1] \n\t" yading@10: "slt %[qc2], $zero, %[qc2] \n\t" yading@10: "slt %[qc3], $zero, %[qc3] \n\t" yading@10: "slt %[qc4], $zero, %[qc4] \n\t" yading@10: "lw $t0, 0(%[in_int]) \n\t" yading@10: "lw $t1, 4(%[in_int]) \n\t" yading@10: "lw $t2, 8(%[in_int]) \n\t" yading@10: "lw $t3, 12(%[in_int]) \n\t" yading@10: "srl $t0, $t0, 31 \n\t" yading@10: "srl $t1, $t1, 31 \n\t" yading@10: "srl $t2, $t2, 31 \n\t" yading@10: "srl $t3, $t3, 31 \n\t" yading@10: "subu $t4, $zero, %[qc1] \n\t" yading@10: "subu $t5, $zero, %[qc2] \n\t" yading@10: "subu $t6, $zero, %[qc3] \n\t" yading@10: "subu $t7, $zero, %[qc4] \n\t" yading@10: "movn %[qc1], $t4, $t0 \n\t" yading@10: "movn %[qc2], $t5, $t1 \n\t" yading@10: "movn %[qc3], $t6, $t2 \n\t" yading@10: "movn %[qc4], $t7, $t3 \n\t" yading@10: yading@10: ".set pop \n\t" yading@10: yading@10: : [qc1]"+r"(qc1), [qc2]"+r"(qc2), yading@10: [qc3]"+r"(qc3), [qc4]"+r"(qc4) yading@10: : [in_int]"r"(in_int) yading@10: : "t0", "t1", "t2", "t3", yading@10: "t4", "t5", "t6", "t7", yading@10: "memory" yading@10: ); yading@10: yading@10: curidx = qc1; yading@10: curidx *= 3; yading@10: curidx += qc2; yading@10: curidx *= 3; yading@10: curidx += qc3; yading@10: curidx *= 3; yading@10: curidx += qc4; yading@10: curidx += 40; yading@10: yading@10: curbits += p_bits[curidx]; yading@10: vec = &p_codes[curidx*4]; yading@10: yading@10: __asm__ volatile ( yading@10: ".set push \n\t" yading@10: ".set noreorder \n\t" yading@10: yading@10: "lwc1 $f0, 0(%[in_pos]) \n\t" yading@10: "lwc1 $f1, 0(%[vec]) \n\t" yading@10: "lwc1 $f2, 4(%[in_pos]) \n\t" yading@10: "lwc1 $f3, 4(%[vec]) \n\t" yading@10: "lwc1 $f4, 8(%[in_pos]) \n\t" yading@10: "lwc1 $f5, 8(%[vec]) \n\t" yading@10: "lwc1 $f6, 12(%[in_pos]) \n\t" yading@10: "lwc1 $f7, 12(%[vec]) \n\t" yading@10: "nmsub.s %[di0], $f0, $f1, %[IQ] \n\t" yading@10: "nmsub.s %[di1], $f2, $f3, %[IQ] \n\t" yading@10: "nmsub.s %[di2], $f4, $f5, %[IQ] \n\t" yading@10: "nmsub.s %[di3], $f6, $f7, %[IQ] \n\t" yading@10: yading@10: ".set pop \n\t" yading@10: yading@10: : [di0]"=&f"(di0), [di1]"=&f"(di1), yading@10: [di2]"=&f"(di2), [di3]"=&f"(di3) yading@10: : [in_pos]"r"(in_pos), [vec]"r"(vec), yading@10: [IQ]"f"(IQ) yading@10: : "$f0", "$f1", "$f2", "$f3", yading@10: "$f4", "$f5", "$f6", "$f7", yading@10: "memory" yading@10: ); yading@10: yading@10: cost += di0 * di0 + di1 * di1 yading@10: + di2 * di2 + di3 * di3; yading@10: } yading@10: yading@10: if (bits) yading@10: *bits = curbits; yading@10: return cost * lambda + curbits; yading@10: } yading@10: yading@10: static float get_band_cost_UQUAD_mips(struct AACEncContext *s, yading@10: PutBitContext *pb, const float *in, yading@10: const float *scaled, int size, int scale_idx, yading@10: int cb, const float lambda, const float uplim, yading@10: int *bits) yading@10: { yading@10: const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512]; yading@10: const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512]; yading@10: int i; yading@10: float cost = 0; yading@10: int curbits = 0; yading@10: int qc1, qc2, qc3, qc4; yading@10: yading@10: uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1]; yading@10: float *p_codes = (float *)ff_aac_codebook_vectors[cb-1]; yading@10: yading@10: for (i = 0; i < size; i += 4) { yading@10: const float *vec; yading@10: int curidx; yading@10: float *in_pos = (float *)&in[i]; yading@10: float di0, di1, di2, di3; yading@10: yading@10: qc1 = scaled[i ] * Q34 + 0.4054f; yading@10: qc2 = scaled[i+1] * Q34 + 0.4054f; yading@10: qc3 = scaled[i+2] * Q34 + 0.4054f; yading@10: qc4 = scaled[i+3] * Q34 + 0.4054f; yading@10: yading@10: __asm__ volatile ( yading@10: ".set push \n\t" yading@10: ".set noreorder \n\t" yading@10: yading@10: "ori $t4, $zero, 2 \n\t" yading@10: "slt $t0, $t4, %[qc1] \n\t" yading@10: "slt $t1, $t4, %[qc2] \n\t" yading@10: "slt $t2, $t4, %[qc3] \n\t" yading@10: "slt $t3, $t4, %[qc4] \n\t" yading@10: "movn %[qc1], $t4, $t0 \n\t" yading@10: "movn %[qc2], $t4, $t1 \n\t" yading@10: "movn %[qc3], $t4, $t2 \n\t" yading@10: "movn %[qc4], $t4, $t3 \n\t" yading@10: yading@10: ".set pop \n\t" yading@10: yading@10: : [qc1]"+r"(qc1), [qc2]"+r"(qc2), yading@10: [qc3]"+r"(qc3), [qc4]"+r"(qc4) yading@10: : yading@10: : "t0", "t1", "t2", "t3", "t4" yading@10: ); yading@10: yading@10: curidx = qc1; yading@10: curidx *= 3; yading@10: curidx += qc2; yading@10: curidx *= 3; yading@10: curidx += qc3; yading@10: curidx *= 3; yading@10: curidx += qc4; yading@10: yading@10: curbits += p_bits[curidx]; yading@10: curbits += uquad_sign_bits[curidx]; yading@10: vec = &p_codes[curidx*4]; yading@10: yading@10: __asm__ volatile ( yading@10: ".set push \n\t" yading@10: ".set noreorder \n\t" yading@10: yading@10: "lwc1 %[di0], 0(%[in_pos]) \n\t" yading@10: "lwc1 %[di1], 4(%[in_pos]) \n\t" yading@10: "lwc1 %[di2], 8(%[in_pos]) \n\t" yading@10: "lwc1 %[di3], 12(%[in_pos]) \n\t" yading@10: "abs.s %[di0], %[di0] \n\t" yading@10: "abs.s %[di1], %[di1] \n\t" yading@10: "abs.s %[di2], %[di2] \n\t" yading@10: "abs.s %[di3], %[di3] \n\t" yading@10: "lwc1 $f0, 0(%[vec]) \n\t" yading@10: "lwc1 $f1, 4(%[vec]) \n\t" yading@10: "lwc1 $f2, 8(%[vec]) \n\t" yading@10: "lwc1 $f3, 12(%[vec]) \n\t" yading@10: "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t" yading@10: "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t" yading@10: "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t" yading@10: "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t" yading@10: yading@10: ".set pop \n\t" yading@10: yading@10: : [di0]"=&f"(di0), [di1]"=&f"(di1), yading@10: [di2]"=&f"(di2), [di3]"=&f"(di3) yading@10: : [in_pos]"r"(in_pos), [vec]"r"(vec), yading@10: [IQ]"f"(IQ) yading@10: : "$f0", "$f1", "$f2", "$f3", yading@10: "memory" yading@10: ); yading@10: yading@10: cost += di0 * di0 + di1 * di1 yading@10: + di2 * di2 + di3 * di3; yading@10: } yading@10: yading@10: if (bits) yading@10: *bits = curbits; yading@10: return cost * lambda + curbits; yading@10: } yading@10: yading@10: static float get_band_cost_SPAIR_mips(struct AACEncContext *s, yading@10: PutBitContext *pb, const float *in, yading@10: const float *scaled, int size, int scale_idx, yading@10: int cb, const float lambda, const float uplim, yading@10: int *bits) yading@10: { yading@10: const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512]; yading@10: const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512]; yading@10: int i; yading@10: float cost = 0; yading@10: int qc1, qc2, qc3, qc4; yading@10: int curbits = 0; yading@10: yading@10: uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1]; yading@10: float *p_codes = (float *)ff_aac_codebook_vectors[cb-1]; yading@10: yading@10: for (i = 0; i < size; i += 4) { yading@10: const float *vec, *vec2; yading@10: int curidx, curidx2; yading@10: int *in_int = (int *)&in[i]; yading@10: float *in_pos = (float *)&in[i]; yading@10: float di0, di1, di2, di3; yading@10: yading@10: qc1 = scaled[i ] * Q34 + 0.4054f; yading@10: qc2 = scaled[i+1] * Q34 + 0.4054f; yading@10: qc3 = scaled[i+2] * Q34 + 0.4054f; yading@10: qc4 = scaled[i+3] * Q34 + 0.4054f; yading@10: yading@10: __asm__ volatile ( yading@10: ".set push \n\t" yading@10: ".set noreorder \n\t" yading@10: yading@10: "ori $t4, $zero, 4 \n\t" yading@10: "slt $t0, $t4, %[qc1] \n\t" yading@10: "slt $t1, $t4, %[qc2] \n\t" yading@10: "slt $t2, $t4, %[qc3] \n\t" yading@10: "slt $t3, $t4, %[qc4] \n\t" yading@10: "movn %[qc1], $t4, $t0 \n\t" yading@10: "movn %[qc2], $t4, $t1 \n\t" yading@10: "movn %[qc3], $t4, $t2 \n\t" yading@10: "movn %[qc4], $t4, $t3 \n\t" yading@10: "lw $t0, 0(%[in_int]) \n\t" yading@10: "lw $t1, 4(%[in_int]) \n\t" yading@10: "lw $t2, 8(%[in_int]) \n\t" yading@10: "lw $t3, 12(%[in_int]) \n\t" yading@10: "srl $t0, $t0, 31 \n\t" yading@10: "srl $t1, $t1, 31 \n\t" yading@10: "srl $t2, $t2, 31 \n\t" yading@10: "srl $t3, $t3, 31 \n\t" yading@10: "subu $t4, $zero, %[qc1] \n\t" yading@10: "subu $t5, $zero, %[qc2] \n\t" yading@10: "subu $t6, $zero, %[qc3] \n\t" yading@10: "subu $t7, $zero, %[qc4] \n\t" yading@10: "movn %[qc1], $t4, $t0 \n\t" yading@10: "movn %[qc2], $t5, $t1 \n\t" yading@10: "movn %[qc3], $t6, $t2 \n\t" yading@10: "movn %[qc4], $t7, $t3 \n\t" yading@10: yading@10: ".set pop \n\t" yading@10: yading@10: : [qc1]"+r"(qc1), [qc2]"+r"(qc2), yading@10: [qc3]"+r"(qc3), [qc4]"+r"(qc4) yading@10: : [in_int]"r"(in_int) yading@10: : "t0", "t1", "t2", "t3", yading@10: "t4", "t5", "t6", "t7", yading@10: "memory" yading@10: ); yading@10: yading@10: curidx = 9 * qc1; yading@10: curidx += qc2 + 40; yading@10: yading@10: curidx2 = 9 * qc3; yading@10: curidx2 += qc4 + 40; yading@10: yading@10: curbits += p_bits[curidx]; yading@10: curbits += p_bits[curidx2]; yading@10: yading@10: vec = &p_codes[curidx*2]; yading@10: vec2 = &p_codes[curidx2*2]; yading@10: yading@10: __asm__ volatile ( yading@10: ".set push \n\t" yading@10: ".set noreorder \n\t" yading@10: yading@10: "lwc1 $f0, 0(%[in_pos]) \n\t" yading@10: "lwc1 $f1, 0(%[vec]) \n\t" yading@10: "lwc1 $f2, 4(%[in_pos]) \n\t" yading@10: "lwc1 $f3, 4(%[vec]) \n\t" yading@10: "lwc1 $f4, 8(%[in_pos]) \n\t" yading@10: "lwc1 $f5, 0(%[vec2]) \n\t" yading@10: "lwc1 $f6, 12(%[in_pos]) \n\t" yading@10: "lwc1 $f7, 4(%[vec2]) \n\t" yading@10: "nmsub.s %[di0], $f0, $f1, %[IQ] \n\t" yading@10: "nmsub.s %[di1], $f2, $f3, %[IQ] \n\t" yading@10: "nmsub.s %[di2], $f4, $f5, %[IQ] \n\t" yading@10: "nmsub.s %[di3], $f6, $f7, %[IQ] \n\t" yading@10: yading@10: ".set pop \n\t" yading@10: yading@10: : [di0]"=&f"(di0), [di1]"=&f"(di1), yading@10: [di2]"=&f"(di2), [di3]"=&f"(di3) yading@10: : [in_pos]"r"(in_pos), [vec]"r"(vec), yading@10: [vec2]"r"(vec2), [IQ]"f"(IQ) yading@10: : "$f0", "$f1", "$f2", "$f3", yading@10: "$f4", "$f5", "$f6", "$f7", yading@10: "memory" yading@10: ); yading@10: yading@10: cost += di0 * di0 + di1 * di1 yading@10: + di2 * di2 + di3 * di3; yading@10: } yading@10: yading@10: if (bits) yading@10: *bits = curbits; yading@10: return cost * lambda + curbits; yading@10: } yading@10: yading@10: static float get_band_cost_UPAIR7_mips(struct AACEncContext *s, yading@10: PutBitContext *pb, const float *in, yading@10: const float *scaled, int size, int scale_idx, yading@10: int cb, const float lambda, const float uplim, yading@10: int *bits) yading@10: { yading@10: const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512]; yading@10: const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512]; yading@10: int i; yading@10: float cost = 0; yading@10: int qc1, qc2, qc3, qc4; yading@10: int curbits = 0; yading@10: yading@10: uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1]; yading@10: float *p_codes = (float *)ff_aac_codebook_vectors[cb-1]; yading@10: yading@10: for (i = 0; i < size; i += 4) { yading@10: const float *vec, *vec2; yading@10: int curidx, curidx2, sign1, count1, sign2, count2; yading@10: int *in_int = (int *)&in[i]; yading@10: float *in_pos = (float *)&in[i]; yading@10: float di0, di1, di2, di3; yading@10: yading@10: qc1 = scaled[i ] * Q34 + 0.4054f; yading@10: qc2 = scaled[i+1] * Q34 + 0.4054f; yading@10: qc3 = scaled[i+2] * Q34 + 0.4054f; yading@10: qc4 = scaled[i+3] * Q34 + 0.4054f; yading@10: yading@10: __asm__ volatile ( yading@10: ".set push \n\t" yading@10: ".set noreorder \n\t" yading@10: yading@10: "ori $t4, $zero, 7 \n\t" yading@10: "ori %[sign1], $zero, 0 \n\t" yading@10: "ori %[sign2], $zero, 0 \n\t" yading@10: "slt $t0, $t4, %[qc1] \n\t" yading@10: "slt $t1, $t4, %[qc2] \n\t" yading@10: "slt $t2, $t4, %[qc3] \n\t" yading@10: "slt $t3, $t4, %[qc4] \n\t" yading@10: "movn %[qc1], $t4, $t0 \n\t" yading@10: "movn %[qc2], $t4, $t1 \n\t" yading@10: "movn %[qc3], $t4, $t2 \n\t" yading@10: "movn %[qc4], $t4, $t3 \n\t" yading@10: "lw $t0, 0(%[in_int]) \n\t" yading@10: "lw $t1, 4(%[in_int]) \n\t" yading@10: "lw $t2, 8(%[in_int]) \n\t" yading@10: "lw $t3, 12(%[in_int]) \n\t" yading@10: "slt $t0, $t0, $zero \n\t" yading@10: "movn %[sign1], $t0, %[qc1] \n\t" yading@10: "slt $t2, $t2, $zero \n\t" yading@10: "movn %[sign2], $t2, %[qc3] \n\t" yading@10: "slt $t1, $t1, $zero \n\t" yading@10: "sll $t0, %[sign1], 1 \n\t" yading@10: "or $t0, $t0, $t1 \n\t" yading@10: "movn %[sign1], $t0, %[qc2] \n\t" yading@10: "slt $t3, $t3, $zero \n\t" yading@10: "sll $t0, %[sign2], 1 \n\t" yading@10: "or $t0, $t0, $t3 \n\t" yading@10: "movn %[sign2], $t0, %[qc4] \n\t" yading@10: "slt %[count1], $zero, %[qc1] \n\t" yading@10: "slt $t1, $zero, %[qc2] \n\t" yading@10: "slt %[count2], $zero, %[qc3] \n\t" yading@10: "slt $t2, $zero, %[qc4] \n\t" yading@10: "addu %[count1], %[count1], $t1 \n\t" yading@10: "addu %[count2], %[count2], $t2 \n\t" yading@10: yading@10: ".set pop \n\t" yading@10: yading@10: : [qc1]"+r"(qc1), [qc2]"+r"(qc2), yading@10: [qc3]"+r"(qc3), [qc4]"+r"(qc4), yading@10: [sign1]"=&r"(sign1), [count1]"=&r"(count1), yading@10: [sign2]"=&r"(sign2), [count2]"=&r"(count2) yading@10: : [in_int]"r"(in_int) yading@10: : "t0", "t1", "t2", "t3", "t4", yading@10: "memory" yading@10: ); yading@10: yading@10: curidx = 8 * qc1; yading@10: curidx += qc2; yading@10: yading@10: curidx2 = 8 * qc3; yading@10: curidx2 += qc4; yading@10: yading@10: curbits += p_bits[curidx]; yading@10: curbits += upair7_sign_bits[curidx]; yading@10: vec = &p_codes[curidx*2]; yading@10: yading@10: curbits += p_bits[curidx2]; yading@10: curbits += upair7_sign_bits[curidx2]; yading@10: vec2 = &p_codes[curidx2*2]; yading@10: yading@10: __asm__ volatile ( yading@10: ".set push \n\t" yading@10: ".set noreorder \n\t" yading@10: yading@10: "lwc1 %[di0], 0(%[in_pos]) \n\t" yading@10: "lwc1 %[di1], 4(%[in_pos]) \n\t" yading@10: "lwc1 %[di2], 8(%[in_pos]) \n\t" yading@10: "lwc1 %[di3], 12(%[in_pos]) \n\t" yading@10: "abs.s %[di0], %[di0] \n\t" yading@10: "abs.s %[di1], %[di1] \n\t" yading@10: "abs.s %[di2], %[di2] \n\t" yading@10: "abs.s %[di3], %[di3] \n\t" yading@10: "lwc1 $f0, 0(%[vec]) \n\t" yading@10: "lwc1 $f1, 4(%[vec]) \n\t" yading@10: "lwc1 $f2, 0(%[vec2]) \n\t" yading@10: "lwc1 $f3, 4(%[vec2]) \n\t" yading@10: "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t" yading@10: "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t" yading@10: "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t" yading@10: "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t" yading@10: yading@10: ".set pop \n\t" yading@10: yading@10: : [di0]"=&f"(di0), [di1]"=&f"(di1), yading@10: [di2]"=&f"(di2), [di3]"=&f"(di3) yading@10: : [in_pos]"r"(in_pos), [vec]"r"(vec), yading@10: [vec2]"r"(vec2), [IQ]"f"(IQ) yading@10: : "$f0", "$f1", "$f2", "$f3", yading@10: "memory" yading@10: ); yading@10: yading@10: cost += di0 * di0 + di1 * di1 yading@10: + di2 * di2 + di3 * di3; yading@10: } yading@10: yading@10: if (bits) yading@10: *bits = curbits; yading@10: return cost * lambda + curbits; yading@10: } yading@10: yading@10: static float get_band_cost_UPAIR12_mips(struct AACEncContext *s, yading@10: PutBitContext *pb, const float *in, yading@10: const float *scaled, int size, int scale_idx, yading@10: int cb, const float lambda, const float uplim, yading@10: int *bits) yading@10: { yading@10: const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512]; yading@10: const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512]; yading@10: int i; yading@10: float cost = 0; yading@10: int qc1, qc2, qc3, qc4; yading@10: int curbits = 0; yading@10: yading@10: uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1]; yading@10: float *p_codes = (float *)ff_aac_codebook_vectors[cb-1]; yading@10: yading@10: for (i = 0; i < size; i += 4) { yading@10: const float *vec, *vec2; yading@10: int curidx, curidx2; yading@10: int sign1, count1, sign2, count2; yading@10: int *in_int = (int *)&in[i]; yading@10: float *in_pos = (float *)&in[i]; yading@10: float di0, di1, di2, di3; yading@10: yading@10: qc1 = scaled[i ] * Q34 + 0.4054f; yading@10: qc2 = scaled[i+1] * Q34 + 0.4054f; yading@10: qc3 = scaled[i+2] * Q34 + 0.4054f; yading@10: qc4 = scaled[i+3] * Q34 + 0.4054f; yading@10: yading@10: __asm__ volatile ( yading@10: ".set push \n\t" yading@10: ".set noreorder \n\t" yading@10: yading@10: "ori $t4, $zero, 12 \n\t" yading@10: "ori %[sign1], $zero, 0 \n\t" yading@10: "ori %[sign2], $zero, 0 \n\t" yading@10: "slt $t0, $t4, %[qc1] \n\t" yading@10: "slt $t1, $t4, %[qc2] \n\t" yading@10: "slt $t2, $t4, %[qc3] \n\t" yading@10: "slt $t3, $t4, %[qc4] \n\t" yading@10: "movn %[qc1], $t4, $t0 \n\t" yading@10: "movn %[qc2], $t4, $t1 \n\t" yading@10: "movn %[qc3], $t4, $t2 \n\t" yading@10: "movn %[qc4], $t4, $t3 \n\t" yading@10: "lw $t0, 0(%[in_int]) \n\t" yading@10: "lw $t1, 4(%[in_int]) \n\t" yading@10: "lw $t2, 8(%[in_int]) \n\t" yading@10: "lw $t3, 12(%[in_int]) \n\t" yading@10: "slt $t0, $t0, $zero \n\t" yading@10: "movn %[sign1], $t0, %[qc1] \n\t" yading@10: "slt $t2, $t2, $zero \n\t" yading@10: "movn %[sign2], $t2, %[qc3] \n\t" yading@10: "slt $t1, $t1, $zero \n\t" yading@10: "sll $t0, %[sign1], 1 \n\t" yading@10: "or $t0, $t0, $t1 \n\t" yading@10: "movn %[sign1], $t0, %[qc2] \n\t" yading@10: "slt $t3, $t3, $zero \n\t" yading@10: "sll $t0, %[sign2], 1 \n\t" yading@10: "or $t0, $t0, $t3 \n\t" yading@10: "movn %[sign2], $t0, %[qc4] \n\t" yading@10: "slt %[count1], $zero, %[qc1] \n\t" yading@10: "slt $t1, $zero, %[qc2] \n\t" yading@10: "slt %[count2], $zero, %[qc3] \n\t" yading@10: "slt $t2, $zero, %[qc4] \n\t" yading@10: "addu %[count1], %[count1], $t1 \n\t" yading@10: "addu %[count2], %[count2], $t2 \n\t" yading@10: yading@10: ".set pop \n\t" yading@10: yading@10: : [qc1]"+r"(qc1), [qc2]"+r"(qc2), yading@10: [qc3]"+r"(qc3), [qc4]"+r"(qc4), yading@10: [sign1]"=&r"(sign1), [count1]"=&r"(count1), yading@10: [sign2]"=&r"(sign2), [count2]"=&r"(count2) yading@10: : [in_int]"r"(in_int) yading@10: : "t0", "t1", "t2", "t3", "t4", yading@10: "memory" yading@10: ); yading@10: yading@10: curidx = 13 * qc1; yading@10: curidx += qc2; yading@10: yading@10: curidx2 = 13 * qc3; yading@10: curidx2 += qc4; yading@10: yading@10: curbits += p_bits[curidx]; yading@10: curbits += p_bits[curidx2]; yading@10: curbits += upair12_sign_bits[curidx]; yading@10: curbits += upair12_sign_bits[curidx2]; yading@10: vec = &p_codes[curidx*2]; yading@10: vec2 = &p_codes[curidx2*2]; yading@10: yading@10: __asm__ volatile ( yading@10: ".set push \n\t" yading@10: ".set noreorder \n\t" yading@10: yading@10: "lwc1 %[di0], 0(%[in_pos]) \n\t" yading@10: "lwc1 %[di1], 4(%[in_pos]) \n\t" yading@10: "lwc1 %[di2], 8(%[in_pos]) \n\t" yading@10: "lwc1 %[di3], 12(%[in_pos]) \n\t" yading@10: "abs.s %[di0], %[di0] \n\t" yading@10: "abs.s %[di1], %[di1] \n\t" yading@10: "abs.s %[di2], %[di2] \n\t" yading@10: "abs.s %[di3], %[di3] \n\t" yading@10: "lwc1 $f0, 0(%[vec]) \n\t" yading@10: "lwc1 $f1, 4(%[vec]) \n\t" yading@10: "lwc1 $f2, 0(%[vec2]) \n\t" yading@10: "lwc1 $f3, 4(%[vec2]) \n\t" yading@10: "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t" yading@10: "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t" yading@10: "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t" yading@10: "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t" yading@10: yading@10: ".set pop \n\t" yading@10: yading@10: : [di0]"=&f"(di0), [di1]"=&f"(di1), yading@10: [di2]"=&f"(di2), [di3]"=&f"(di3) yading@10: : [in_pos]"r"(in_pos), [vec]"r"(vec), yading@10: [vec2]"r"(vec2), [IQ]"f"(IQ) yading@10: : "$f0", "$f1", "$f2", "$f3", yading@10: "memory" yading@10: ); yading@10: yading@10: cost += di0 * di0 + di1 * di1 yading@10: + di2 * di2 + di3 * di3; yading@10: } yading@10: yading@10: if (bits) yading@10: *bits = curbits; yading@10: return cost * lambda + curbits; yading@10: } yading@10: yading@10: static float get_band_cost_ESC_mips(struct AACEncContext *s, yading@10: PutBitContext *pb, const float *in, yading@10: const float *scaled, int size, int scale_idx, yading@10: int cb, const float lambda, const float uplim, yading@10: int *bits) yading@10: { yading@10: const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512]; yading@10: const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512]; yading@10: const float CLIPPED_ESCAPE = 165140.0f * IQ; yading@10: int i; yading@10: float cost = 0; yading@10: int qc1, qc2, qc3, qc4; yading@10: int curbits = 0; yading@10: yading@10: uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1]; yading@10: float *p_codes = (float* )ff_aac_codebook_vectors[cb-1]; yading@10: yading@10: for (i = 0; i < size; i += 4) { yading@10: const float *vec, *vec2; yading@10: int curidx, curidx2; yading@10: float t1, t2, t3, t4; yading@10: float di1, di2, di3, di4; yading@10: int cond0, cond1, cond2, cond3; yading@10: int c1, c2, c3, c4; yading@10: yading@10: qc1 = scaled[i ] * Q34 + 0.4054f; yading@10: qc2 = scaled[i+1] * Q34 + 0.4054f; yading@10: qc3 = scaled[i+2] * Q34 + 0.4054f; yading@10: qc4 = scaled[i+3] * Q34 + 0.4054f; yading@10: yading@10: __asm__ volatile ( yading@10: ".set push \n\t" yading@10: ".set noreorder \n\t" yading@10: yading@10: "ori $t4, $zero, 15 \n\t" yading@10: "ori $t5, $zero, 16 \n\t" yading@10: "shll_s.w %[c1], %[qc1], 18 \n\t" yading@10: "shll_s.w %[c2], %[qc2], 18 \n\t" yading@10: "shll_s.w %[c3], %[qc3], 18 \n\t" yading@10: "shll_s.w %[c4], %[qc4], 18 \n\t" yading@10: "srl %[c1], %[c1], 18 \n\t" yading@10: "srl %[c2], %[c2], 18 \n\t" yading@10: "srl %[c3], %[c3], 18 \n\t" yading@10: "srl %[c4], %[c4], 18 \n\t" yading@10: "slt %[cond0], $t4, %[qc1] \n\t" yading@10: "slt %[cond1], $t4, %[qc2] \n\t" yading@10: "slt %[cond2], $t4, %[qc3] \n\t" yading@10: "slt %[cond3], $t4, %[qc4] \n\t" yading@10: "movn %[qc1], $t5, %[cond0] \n\t" yading@10: "movn %[qc2], $t5, %[cond1] \n\t" yading@10: "movn %[qc3], $t5, %[cond2] \n\t" yading@10: "movn %[qc4], $t5, %[cond3] \n\t" yading@10: yading@10: ".set pop \n\t" yading@10: yading@10: : [qc1]"+r"(qc1), [qc2]"+r"(qc2), yading@10: [qc3]"+r"(qc3), [qc4]"+r"(qc4), yading@10: [cond0]"=&r"(cond0), [cond1]"=&r"(cond1), yading@10: [cond2]"=&r"(cond2), [cond3]"=&r"(cond3), yading@10: [c1]"=&r"(c1), [c2]"=&r"(c2), yading@10: [c3]"=&r"(c3), [c4]"=&r"(c4) yading@10: : yading@10: : "t4", "t5" yading@10: ); yading@10: yading@10: curidx = 17 * qc1; yading@10: curidx += qc2; yading@10: yading@10: curidx2 = 17 * qc3; yading@10: curidx2 += qc4; yading@10: yading@10: curbits += p_bits[curidx]; yading@10: curbits += esc_sign_bits[curidx]; yading@10: vec = &p_codes[curidx*2]; yading@10: yading@10: curbits += p_bits[curidx2]; yading@10: curbits += esc_sign_bits[curidx2]; yading@10: vec2 = &p_codes[curidx2*2]; yading@10: yading@10: curbits += (av_log2(c1) * 2 - 3) & (-cond0); yading@10: curbits += (av_log2(c2) * 2 - 3) & (-cond1); yading@10: curbits += (av_log2(c3) * 2 - 3) & (-cond2); yading@10: curbits += (av_log2(c4) * 2 - 3) & (-cond3); yading@10: yading@10: t1 = fabsf(in[i ]); yading@10: t2 = fabsf(in[i+1]); yading@10: t3 = fabsf(in[i+2]); yading@10: t4 = fabsf(in[i+3]); yading@10: yading@10: if (cond0) { yading@10: if (t1 >= CLIPPED_ESCAPE) { yading@10: di1 = t1 - CLIPPED_ESCAPE; yading@10: } else { yading@10: di1 = t1 - c1 * cbrtf(c1) * IQ; yading@10: } yading@10: } else yading@10: di1 = t1 - vec[0] * IQ; yading@10: yading@10: if (cond1) { yading@10: if (t2 >= CLIPPED_ESCAPE) { yading@10: di2 = t2 - CLIPPED_ESCAPE; yading@10: } else { yading@10: di2 = t2 - c2 * cbrtf(c2) * IQ; yading@10: } yading@10: } else yading@10: di2 = t2 - vec[1] * IQ; yading@10: yading@10: if (cond2) { yading@10: if (t3 >= CLIPPED_ESCAPE) { yading@10: di3 = t3 - CLIPPED_ESCAPE; yading@10: } else { yading@10: di3 = t3 - c3 * cbrtf(c3) * IQ; yading@10: } yading@10: } else yading@10: di3 = t3 - vec2[0] * IQ; yading@10: yading@10: if (cond3) { yading@10: if (t4 >= CLIPPED_ESCAPE) { yading@10: di4 = t4 - CLIPPED_ESCAPE; yading@10: } else { yading@10: di4 = t4 - c4 * cbrtf(c4) * IQ; yading@10: } yading@10: } else yading@10: di4 = t4 - vec2[1]*IQ; yading@10: yading@10: cost += di1 * di1 + di2 * di2 yading@10: + di3 * di3 + di4 * di4; yading@10: } yading@10: yading@10: if (bits) yading@10: *bits = curbits; yading@10: return cost * lambda + curbits; yading@10: } yading@10: yading@10: static float (*const get_band_cost_arr[])(struct AACEncContext *s, yading@10: PutBitContext *pb, const float *in, yading@10: const float *scaled, int size, int scale_idx, yading@10: int cb, const float lambda, const float uplim, yading@10: int *bits) = { yading@10: get_band_cost_ZERO_mips, yading@10: get_band_cost_SQUAD_mips, yading@10: get_band_cost_SQUAD_mips, yading@10: get_band_cost_UQUAD_mips, yading@10: get_band_cost_UQUAD_mips, yading@10: get_band_cost_SPAIR_mips, yading@10: get_band_cost_SPAIR_mips, yading@10: get_band_cost_UPAIR7_mips, yading@10: get_band_cost_UPAIR7_mips, yading@10: get_band_cost_UPAIR12_mips, yading@10: get_band_cost_UPAIR12_mips, yading@10: get_band_cost_ESC_mips, yading@10: }; yading@10: yading@10: #define get_band_cost( \ yading@10: s, pb, in, scaled, size, scale_idx, cb, \ yading@10: lambda, uplim, bits) \ yading@10: get_band_cost_arr[cb]( \ yading@10: s, pb, in, scaled, size, scale_idx, cb, \ yading@10: lambda, uplim, bits) yading@10: yading@10: static float quantize_band_cost(struct AACEncContext *s, const float *in, yading@10: const float *scaled, int size, int scale_idx, yading@10: int cb, const float lambda, const float uplim, yading@10: int *bits) yading@10: { yading@10: return get_band_cost(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits); yading@10: } yading@10: yading@10: static void search_for_quantizers_twoloop_mips(AVCodecContext *avctx, yading@10: AACEncContext *s, yading@10: SingleChannelElement *sce, yading@10: const float lambda) yading@10: { yading@10: int start = 0, i, w, w2, g; yading@10: int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate / avctx->channels; yading@10: float dists[128] = { 0 }, uplims[128]; yading@10: float maxvals[128]; yading@10: int fflag, minscaler; yading@10: int its = 0; yading@10: int allz = 0; yading@10: float minthr = INFINITY; yading@10: yading@10: destbits = FFMIN(destbits, 5800); yading@10: for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { yading@10: for (g = 0; g < sce->ics.num_swb; g++) { yading@10: int nz = 0; yading@10: float uplim = 0.0f; yading@10: for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { yading@10: FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g]; yading@10: uplim += band->threshold; yading@10: if (band->energy <= band->threshold || band->threshold == 0.0f) { yading@10: sce->zeroes[(w+w2)*16+g] = 1; yading@10: continue; yading@10: } yading@10: nz = 1; yading@10: } yading@10: uplims[w*16+g] = uplim *512; yading@10: sce->zeroes[w*16+g] = !nz; yading@10: if (nz) yading@10: minthr = FFMIN(minthr, uplim); yading@10: allz |= nz; yading@10: } yading@10: } yading@10: for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { yading@10: for (g = 0; g < sce->ics.num_swb; g++) { yading@10: if (sce->zeroes[w*16+g]) { yading@10: sce->sf_idx[w*16+g] = SCALE_ONE_POS; yading@10: continue; yading@10: } yading@10: sce->sf_idx[w*16+g] = SCALE_ONE_POS + FFMIN(log2f(uplims[w*16+g]/minthr)*4,59); yading@10: } yading@10: } yading@10: yading@10: if (!allz) yading@10: return; yading@10: abs_pow34_v(s->scoefs, sce->coeffs, 1024); yading@10: yading@10: for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { yading@10: start = w*128; yading@10: for (g = 0; g < sce->ics.num_swb; g++) { yading@10: const float *scaled = s->scoefs + start; yading@10: maxvals[w*16+g] = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], scaled); yading@10: start += sce->ics.swb_sizes[g]; yading@10: } yading@10: } yading@10: yading@10: do { yading@10: int tbits, qstep; yading@10: minscaler = sce->sf_idx[0]; yading@10: qstep = its ? 1 : 32; yading@10: do { yading@10: int prev = -1; yading@10: tbits = 0; yading@10: fflag = 0; yading@10: yading@10: if (qstep > 1) { yading@10: for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { yading@10: start = w*128; yading@10: for (g = 0; g < sce->ics.num_swb; g++) { yading@10: const float *coefs = sce->coeffs + start; yading@10: const float *scaled = s->scoefs + start; yading@10: int bits = 0; yading@10: int cb; yading@10: yading@10: if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) { yading@10: start += sce->ics.swb_sizes[g]; yading@10: continue; yading@10: } yading@10: minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]); yading@10: cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]); yading@10: for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { yading@10: int b; yading@10: bits += quantize_band_cost_bits(s, coefs + w2*128, yading@10: scaled + w2*128, yading@10: sce->ics.swb_sizes[g], yading@10: sce->sf_idx[w*16+g], yading@10: cb, yading@10: 1.0f, yading@10: INFINITY, yading@10: &b); yading@10: } yading@10: if (prev != -1) { yading@10: bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO]; yading@10: } yading@10: tbits += bits; yading@10: start += sce->ics.swb_sizes[g]; yading@10: prev = sce->sf_idx[w*16+g]; yading@10: } yading@10: } yading@10: } yading@10: else { yading@10: for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { yading@10: start = w*128; yading@10: for (g = 0; g < sce->ics.num_swb; g++) { yading@10: const float *coefs = sce->coeffs + start; yading@10: const float *scaled = s->scoefs + start; yading@10: int bits = 0; yading@10: int cb; yading@10: float dist = 0.0f; yading@10: yading@10: if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) { yading@10: start += sce->ics.swb_sizes[g]; yading@10: continue; yading@10: } yading@10: minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]); yading@10: cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]); yading@10: for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { yading@10: int b; yading@10: dist += quantize_band_cost(s, coefs + w2*128, yading@10: scaled + w2*128, yading@10: sce->ics.swb_sizes[g], yading@10: sce->sf_idx[w*16+g], yading@10: cb, yading@10: 1.0f, yading@10: INFINITY, yading@10: &b); yading@10: bits += b; yading@10: } yading@10: dists[w*16+g] = dist - bits; yading@10: if (prev != -1) { yading@10: bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO]; yading@10: } yading@10: tbits += bits; yading@10: start += sce->ics.swb_sizes[g]; yading@10: prev = sce->sf_idx[w*16+g]; yading@10: } yading@10: } yading@10: } yading@10: if (tbits > destbits) { yading@10: for (i = 0; i < 128; i++) yading@10: if (sce->sf_idx[i] < 218 - qstep) yading@10: sce->sf_idx[i] += qstep; yading@10: } else { yading@10: for (i = 0; i < 128; i++) yading@10: if (sce->sf_idx[i] > 60 - qstep) yading@10: sce->sf_idx[i] -= qstep; yading@10: } yading@10: qstep >>= 1; yading@10: if (!qstep && tbits > destbits*1.02 && sce->sf_idx[0] < 217) yading@10: qstep = 1; yading@10: } while (qstep); yading@10: yading@10: fflag = 0; yading@10: minscaler = av_clip(minscaler, 60, 255 - SCALE_MAX_DIFF); yading@10: for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { yading@10: for (g = 0; g < sce->ics.num_swb; g++) { yading@10: int prevsc = sce->sf_idx[w*16+g]; yading@10: if (dists[w*16+g] > uplims[w*16+g] && sce->sf_idx[w*16+g] > 60) { yading@10: if (find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1)) yading@10: sce->sf_idx[w*16+g]--; yading@10: else yading@10: sce->sf_idx[w*16+g]-=2; yading@10: } yading@10: sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF); yading@10: sce->sf_idx[w*16+g] = FFMIN(sce->sf_idx[w*16+g], 219); yading@10: if (sce->sf_idx[w*16+g] != prevsc) yading@10: fflag = 1; yading@10: sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]); yading@10: } yading@10: } yading@10: its++; yading@10: } while (fflag && its < 10); yading@10: } yading@10: yading@10: static void search_for_ms_mips(AACEncContext *s, ChannelElement *cpe, yading@10: const float lambda) yading@10: { yading@10: int start = 0, i, w, w2, g; yading@10: float M[128], S[128]; yading@10: float *L34 = s->scoefs, *R34 = s->scoefs + 128, *M34 = s->scoefs + 128*2, *S34 = s->scoefs + 128*3; yading@10: SingleChannelElement *sce0 = &cpe->ch[0]; yading@10: SingleChannelElement *sce1 = &cpe->ch[1]; yading@10: if (!cpe->common_window) yading@10: return; yading@10: for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) { yading@10: for (g = 0; g < sce0->ics.num_swb; g++) { yading@10: if (!cpe->ch[0].zeroes[w*16+g] && !cpe->ch[1].zeroes[w*16+g]) { yading@10: float dist1 = 0.0f, dist2 = 0.0f; yading@10: for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) { yading@10: FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g]; yading@10: FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g]; yading@10: float minthr = FFMIN(band0->threshold, band1->threshold); yading@10: float maxthr = FFMAX(band0->threshold, band1->threshold); yading@10: for (i = 0; i < sce0->ics.swb_sizes[g]; i+=4) { yading@10: M[i ] = (sce0->coeffs[start+w2*128+i ] yading@10: + sce1->coeffs[start+w2*128+i ]) * 0.5; yading@10: M[i+1] = (sce0->coeffs[start+w2*128+i+1] yading@10: + sce1->coeffs[start+w2*128+i+1]) * 0.5; yading@10: M[i+2] = (sce0->coeffs[start+w2*128+i+2] yading@10: + sce1->coeffs[start+w2*128+i+2]) * 0.5; yading@10: M[i+3] = (sce0->coeffs[start+w2*128+i+3] yading@10: + sce1->coeffs[start+w2*128+i+3]) * 0.5; yading@10: yading@10: S[i ] = M[i ] yading@10: - sce1->coeffs[start+w2*128+i ]; yading@10: S[i+1] = M[i+1] yading@10: - sce1->coeffs[start+w2*128+i+1]; yading@10: S[i+2] = M[i+2] yading@10: - sce1->coeffs[start+w2*128+i+2]; yading@10: S[i+3] = M[i+3] yading@10: - sce1->coeffs[start+w2*128+i+3]; yading@10: } yading@10: abs_pow34_v(L34, sce0->coeffs+start+w2*128, sce0->ics.swb_sizes[g]); yading@10: abs_pow34_v(R34, sce1->coeffs+start+w2*128, sce0->ics.swb_sizes[g]); yading@10: abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]); yading@10: abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]); yading@10: dist1 += quantize_band_cost(s, sce0->coeffs + start + w2*128, yading@10: L34, yading@10: sce0->ics.swb_sizes[g], yading@10: sce0->sf_idx[(w+w2)*16+g], yading@10: sce0->band_type[(w+w2)*16+g], yading@10: lambda / band0->threshold, INFINITY, NULL); yading@10: dist1 += quantize_band_cost(s, sce1->coeffs + start + w2*128, yading@10: R34, yading@10: sce1->ics.swb_sizes[g], yading@10: sce1->sf_idx[(w+w2)*16+g], yading@10: sce1->band_type[(w+w2)*16+g], yading@10: lambda / band1->threshold, INFINITY, NULL); yading@10: dist2 += quantize_band_cost(s, M, yading@10: M34, yading@10: sce0->ics.swb_sizes[g], yading@10: sce0->sf_idx[(w+w2)*16+g], yading@10: sce0->band_type[(w+w2)*16+g], yading@10: lambda / maxthr, INFINITY, NULL); yading@10: dist2 += quantize_band_cost(s, S, yading@10: S34, yading@10: sce1->ics.swb_sizes[g], yading@10: sce1->sf_idx[(w+w2)*16+g], yading@10: sce1->band_type[(w+w2)*16+g], yading@10: lambda / minthr, INFINITY, NULL); yading@10: } yading@10: cpe->ms_mask[w*16+g] = dist2 < dist1; yading@10: } yading@10: start += sce0->ics.swb_sizes[g]; yading@10: } yading@10: } yading@10: } yading@10: #endif /*HAVE_MIPSFPU */ yading@10: yading@10: static void codebook_trellis_rate_mips(AACEncContext *s, SingleChannelElement *sce, yading@10: int win, int group_len, const float lambda) yading@10: { yading@10: BandCodingPath path[120][12]; yading@10: int w, swb, cb, start, size; yading@10: int i, j; yading@10: const int max_sfb = sce->ics.max_sfb; yading@10: const int run_bits = sce->ics.num_windows == 1 ? 5 : 3; yading@10: const int run_esc = (1 << run_bits) - 1; yading@10: int idx, ppos, count; yading@10: int stackrun[120], stackcb[120], stack_len; yading@10: float next_minbits = INFINITY; yading@10: int next_mincb = 0; yading@10: yading@10: abs_pow34_v(s->scoefs, sce->coeffs, 1024); yading@10: start = win*128; yading@10: for (cb = 0; cb < 12; cb++) { yading@10: path[0][cb].cost = run_bits+4; yading@10: path[0][cb].prev_idx = -1; yading@10: path[0][cb].run = 0; yading@10: } yading@10: for (swb = 0; swb < max_sfb; swb++) { yading@10: size = sce->ics.swb_sizes[swb]; yading@10: if (sce->zeroes[win*16 + swb]) { yading@10: float cost_stay_here = path[swb][0].cost; yading@10: float cost_get_here = next_minbits + run_bits + 4; yading@10: if ( run_value_bits[sce->ics.num_windows == 8][path[swb][0].run] yading@10: != run_value_bits[sce->ics.num_windows == 8][path[swb][0].run+1]) yading@10: cost_stay_here += run_bits; yading@10: if (cost_get_here < cost_stay_here) { yading@10: path[swb+1][0].prev_idx = next_mincb; yading@10: path[swb+1][0].cost = cost_get_here; yading@10: path[swb+1][0].run = 1; yading@10: } else { yading@10: path[swb+1][0].prev_idx = 0; yading@10: path[swb+1][0].cost = cost_stay_here; yading@10: path[swb+1][0].run = path[swb][0].run + 1; yading@10: } yading@10: next_minbits = path[swb+1][0].cost; yading@10: next_mincb = 0; yading@10: for (cb = 1; cb < 12; cb++) { yading@10: path[swb+1][cb].cost = 61450; yading@10: path[swb+1][cb].prev_idx = -1; yading@10: path[swb+1][cb].run = 0; yading@10: } yading@10: } else { yading@10: float minbits = next_minbits; yading@10: int mincb = next_mincb; yading@10: int startcb = sce->band_type[win*16+swb]; yading@10: next_minbits = INFINITY; yading@10: next_mincb = 0; yading@10: for (cb = 0; cb < startcb; cb++) { yading@10: path[swb+1][cb].cost = 61450; yading@10: path[swb+1][cb].prev_idx = -1; yading@10: path[swb+1][cb].run = 0; yading@10: } yading@10: for (cb = startcb; cb < 12; cb++) { yading@10: float cost_stay_here, cost_get_here; yading@10: float bits = 0.0f; yading@10: for (w = 0; w < group_len; w++) { yading@10: bits += quantize_band_cost_bits(s, sce->coeffs + start + w*128, yading@10: s->scoefs + start + w*128, size, yading@10: sce->sf_idx[(win+w)*16+swb], cb, yading@10: 0, INFINITY, NULL); yading@10: } yading@10: cost_stay_here = path[swb][cb].cost + bits; yading@10: cost_get_here = minbits + bits + run_bits + 4; yading@10: if ( run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run] yading@10: != run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run+1]) yading@10: cost_stay_here += run_bits; yading@10: if (cost_get_here < cost_stay_here) { yading@10: path[swb+1][cb].prev_idx = mincb; yading@10: path[swb+1][cb].cost = cost_get_here; yading@10: path[swb+1][cb].run = 1; yading@10: } else { yading@10: path[swb+1][cb].prev_idx = cb; yading@10: path[swb+1][cb].cost = cost_stay_here; yading@10: path[swb+1][cb].run = path[swb][cb].run + 1; yading@10: } yading@10: if (path[swb+1][cb].cost < next_minbits) { yading@10: next_minbits = path[swb+1][cb].cost; yading@10: next_mincb = cb; yading@10: } yading@10: } yading@10: } yading@10: start += sce->ics.swb_sizes[swb]; yading@10: } yading@10: yading@10: stack_len = 0; yading@10: idx = 0; yading@10: for (cb = 1; cb < 12; cb++) yading@10: if (path[max_sfb][cb].cost < path[max_sfb][idx].cost) yading@10: idx = cb; yading@10: ppos = max_sfb; yading@10: while (ppos > 0) { yading@10: av_assert1(idx >= 0); yading@10: cb = idx; yading@10: stackrun[stack_len] = path[ppos][cb].run; yading@10: stackcb [stack_len] = cb; yading@10: idx = path[ppos-path[ppos][cb].run+1][cb].prev_idx; yading@10: ppos -= path[ppos][cb].run; yading@10: stack_len++; yading@10: } yading@10: yading@10: start = 0; yading@10: for (i = stack_len - 1; i >= 0; i--) { yading@10: put_bits(&s->pb, 4, stackcb[i]); yading@10: count = stackrun[i]; yading@10: memset(sce->zeroes + win*16 + start, !stackcb[i], count); yading@10: for (j = 0; j < count; j++) { yading@10: sce->band_type[win*16 + start] = stackcb[i]; yading@10: start++; yading@10: } yading@10: while (count >= run_esc) { yading@10: put_bits(&s->pb, run_bits, run_esc); yading@10: count -= run_esc; yading@10: } yading@10: put_bits(&s->pb, run_bits, count); yading@10: } yading@10: } yading@10: #endif /* HAVE_INLINE_ASM */ yading@10: yading@10: void ff_aac_coder_init_mips(AACEncContext *c) { yading@10: #if HAVE_INLINE_ASM yading@10: AACCoefficientsEncoder *e = c->coder; yading@10: int option = c->options.aac_coder; yading@10: yading@10: if (option == 2) { yading@10: e->quantize_and_encode_band = quantize_and_encode_band_mips; yading@10: e->encode_window_bands_info = codebook_trellis_rate_mips; yading@10: #if HAVE_MIPSFPU yading@10: e->search_for_quantizers = search_for_quantizers_twoloop_mips; yading@10: e->search_for_ms = search_for_ms_mips; yading@10: #endif /* HAVE_MIPSFPU */ yading@10: } yading@10: #endif /* HAVE_INLINE_ASM */ yading@10: }