annotate ffmpeg/libavcodec/mips/aacsbr_mips.c @ 13:844d341cf643 tip

Back up before ISMIR
author Yading Song <yading.song@eecs.qmul.ac.uk>
date Thu, 31 Oct 2013 13:17:06 +0000
parents 6840f77b83aa
children
rev   line source
yading@10 1 /*
yading@10 2 * Copyright (c) 2012
yading@10 3 * MIPS Technologies, Inc., California.
yading@10 4 *
yading@10 5 * Redistribution and use in source and binary forms, with or without
yading@10 6 * modification, are permitted provided that the following conditions
yading@10 7 * are met:
yading@10 8 * 1. Redistributions of source code must retain the above copyright
yading@10 9 * notice, this list of conditions and the following disclaimer.
yading@10 10 * 2. Redistributions in binary form must reproduce the above copyright
yading@10 11 * notice, this list of conditions and the following disclaimer in the
yading@10 12 * documentation and/or other materials provided with the distribution.
yading@10 13 * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
yading@10 14 * contributors may be used to endorse or promote products derived from
yading@10 15 * this software without specific prior written permission.
yading@10 16 *
yading@10 17 * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
yading@10 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
yading@10 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
yading@10 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
yading@10 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
yading@10 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
yading@10 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
yading@10 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
yading@10 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
yading@10 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
yading@10 27 * SUCH DAMAGE.
yading@10 28 *
yading@10 29 * Authors: Djordje Pesut (djordje@mips.com)
yading@10 30 * Mirjana Vulin (mvulin@mips.com)
yading@10 31 *
yading@10 32 * This file is part of FFmpeg.
yading@10 33 *
yading@10 34 * FFmpeg is free software; you can redistribute it and/or
yading@10 35 * modify it under the terms of the GNU Lesser General Public
yading@10 36 * License as published by the Free Software Foundation; either
yading@10 37 * version 2.1 of the License, or (at your option) any later version.
yading@10 38 *
yading@10 39 * FFmpeg is distributed in the hope that it will be useful,
yading@10 40 * but WITHOUT ANY WARRANTY; without even the implied warranty of
yading@10 41 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
yading@10 42 * Lesser General Public License for more details.
yading@10 43 *
yading@10 44 * You should have received a copy of the GNU Lesser General Public
yading@10 45 * License along with FFmpeg; if not, write to the Free Software
yading@10 46 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
yading@10 47 */
yading@10 48
yading@10 49 /**
yading@10 50 * @file
yading@10 51 * Reference: libavcodec/aacsbr.c
yading@10 52 */
yading@10 53
yading@10 54 #include "libavcodec/aac.h"
yading@10 55 #include "libavcodec/aacsbr.h"
yading@10 56
yading@10 57 #define ENVELOPE_ADJUSTMENT_OFFSET 2
yading@10 58
yading@10 59 #if HAVE_INLINE_ASM
yading@10 60 static int sbr_lf_gen_mips(AACContext *ac, SpectralBandReplication *sbr,
yading@10 61 float X_low[32][40][2], const float W[2][32][32][2],
yading@10 62 int buf_idx)
yading@10 63 {
yading@10 64 int i, k;
yading@10 65 int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
yading@10 66 float *p_x_low = &X_low[0][8][0];
yading@10 67 float *p_w = (float*)&W[buf_idx][0][0][0];
yading@10 68 float *p_x1_low = &X_low[0][0][0];
yading@10 69 float *p_w1 = (float*)&W[1-buf_idx][24][0][0];
yading@10 70
yading@10 71 float *loop_end=p_x1_low + 2560;
yading@10 72
yading@10 73 /* loop unrolled 8 times */
yading@10 74 __asm__ volatile (
yading@10 75 "1: \n\t"
yading@10 76 "sw $0, 0(%[p_x1_low]) \n\t"
yading@10 77 "sw $0, 4(%[p_x1_low]) \n\t"
yading@10 78 "sw $0, 8(%[p_x1_low]) \n\t"
yading@10 79 "sw $0, 12(%[p_x1_low]) \n\t"
yading@10 80 "sw $0, 16(%[p_x1_low]) \n\t"
yading@10 81 "sw $0, 20(%[p_x1_low]) \n\t"
yading@10 82 "sw $0, 24(%[p_x1_low]) \n\t"
yading@10 83 "sw $0, 28(%[p_x1_low]) \n\t"
yading@10 84 "addiu %[p_x1_low], %[p_x1_low], 32 \n\t"
yading@10 85 "bne %[p_x1_low], %[loop_end], 1b \n\t"
yading@10 86 "addiu %[p_x1_low], %[p_x1_low], -10240 \n\t"
yading@10 87
yading@10 88 : [p_x1_low]"+r"(p_x1_low)
yading@10 89 : [loop_end]"r"(loop_end)
yading@10 90 : "memory"
yading@10 91 );
yading@10 92
yading@10 93 for (k = 0; k < sbr->kx[1]; k++) {
yading@10 94 for (i = 0; i < 32; i+=4) {
yading@10 95 /* loop unrolled 4 times */
yading@10 96 __asm__ volatile (
yading@10 97 "lw %[temp0], 0(%[p_w]) \n\t"
yading@10 98 "lw %[temp1], 4(%[p_w]) \n\t"
yading@10 99 "lw %[temp2], 256(%[p_w]) \n\t"
yading@10 100 "lw %[temp3], 260(%[p_w]) \n\t"
yading@10 101 "lw %[temp4], 512(%[p_w]) \n\t"
yading@10 102 "lw %[temp5], 516(%[p_w]) \n\t"
yading@10 103 "lw %[temp6], 768(%[p_w]) \n\t"
yading@10 104 "lw %[temp7], 772(%[p_w]) \n\t"
yading@10 105 "sw %[temp0], 0(%[p_x_low]) \n\t"
yading@10 106 "sw %[temp1], 4(%[p_x_low]) \n\t"
yading@10 107 "sw %[temp2], 8(%[p_x_low]) \n\t"
yading@10 108 "sw %[temp3], 12(%[p_x_low]) \n\t"
yading@10 109 "sw %[temp4], 16(%[p_x_low]) \n\t"
yading@10 110 "sw %[temp5], 20(%[p_x_low]) \n\t"
yading@10 111 "sw %[temp6], 24(%[p_x_low]) \n\t"
yading@10 112 "sw %[temp7], 28(%[p_x_low]) \n\t"
yading@10 113 "addiu %[p_x_low], %[p_x_low], 32 \n\t"
yading@10 114 "addiu %[p_w], %[p_w], 1024 \n\t"
yading@10 115
yading@10 116 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
yading@10 117 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
yading@10 118 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
yading@10 119 [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
yading@10 120 [p_w]"+r"(p_w), [p_x_low]"+r"(p_x_low)
yading@10 121 :
yading@10 122 : "memory"
yading@10 123 );
yading@10 124 }
yading@10 125 p_x_low += 16;
yading@10 126 p_w -= 2046;
yading@10 127 }
yading@10 128
yading@10 129 for (k = 0; k < sbr->kx[0]; k++) {
yading@10 130 for (i = 0; i < 2; i++) {
yading@10 131
yading@10 132 /* loop unrolled 4 times */
yading@10 133 __asm__ volatile (
yading@10 134 "lw %[temp0], 0(%[p_w1]) \n\t"
yading@10 135 "lw %[temp1], 4(%[p_w1]) \n\t"
yading@10 136 "lw %[temp2], 256(%[p_w1]) \n\t"
yading@10 137 "lw %[temp3], 260(%[p_w1]) \n\t"
yading@10 138 "lw %[temp4], 512(%[p_w1]) \n\t"
yading@10 139 "lw %[temp5], 516(%[p_w1]) \n\t"
yading@10 140 "lw %[temp6], 768(%[p_w1]) \n\t"
yading@10 141 "lw %[temp7], 772(%[p_w1]) \n\t"
yading@10 142 "sw %[temp0], 0(%[p_x1_low]) \n\t"
yading@10 143 "sw %[temp1], 4(%[p_x1_low]) \n\t"
yading@10 144 "sw %[temp2], 8(%[p_x1_low]) \n\t"
yading@10 145 "sw %[temp3], 12(%[p_x1_low]) \n\t"
yading@10 146 "sw %[temp4], 16(%[p_x1_low]) \n\t"
yading@10 147 "sw %[temp5], 20(%[p_x1_low]) \n\t"
yading@10 148 "sw %[temp6], 24(%[p_x1_low]) \n\t"
yading@10 149 "sw %[temp7], 28(%[p_x1_low]) \n\t"
yading@10 150 "addiu %[p_x1_low], %[p_x1_low], 32 \n\t"
yading@10 151 "addiu %[p_w1], %[p_w1], 1024 \n\t"
yading@10 152
yading@10 153 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
yading@10 154 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
yading@10 155 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
yading@10 156 [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
yading@10 157 [p_w1]"+r"(p_w1), [p_x1_low]"+r"(p_x1_low)
yading@10 158 :
yading@10 159 : "memory"
yading@10 160 );
yading@10 161 }
yading@10 162 p_x1_low += 64;
yading@10 163 p_w1 -= 510;
yading@10 164 }
yading@10 165 return 0;
yading@10 166 }
yading@10 167
yading@10 168 static int sbr_x_gen_mips(SpectralBandReplication *sbr, float X[2][38][64],
yading@10 169 const float Y0[38][64][2], const float Y1[38][64][2],
yading@10 170 const float X_low[32][40][2], int ch)
yading@10 171 {
yading@10 172 int k, i;
yading@10 173 const int i_f = 32;
yading@10 174 int temp0, temp1, temp2, temp3;
yading@10 175 const float *X_low1, *Y01, *Y11;
yading@10 176 float *x1=&X[0][0][0];
yading@10 177 float *j=x1+4864;
yading@10 178 const int i_Temp = FFMAX(2*sbr->data[ch].t_env_num_env_old - i_f, 0);
yading@10 179
yading@10 180 /* loop unrolled 8 times */
yading@10 181 __asm__ volatile (
yading@10 182 "1: \n\t"
yading@10 183 "sw $0, 0(%[x1]) \n\t"
yading@10 184 "sw $0, 4(%[x1]) \n\t"
yading@10 185 "sw $0, 8(%[x1]) \n\t"
yading@10 186 "sw $0, 12(%[x1]) \n\t"
yading@10 187 "sw $0, 16(%[x1]) \n\t"
yading@10 188 "sw $0, 20(%[x1]) \n\t"
yading@10 189 "sw $0, 24(%[x1]) \n\t"
yading@10 190 "sw $0, 28(%[x1]) \n\t"
yading@10 191 "addiu %[x1], %[x1], 32 \n\t"
yading@10 192 "bne %[x1], %[j], 1b \n\t"
yading@10 193 "addiu %[x1], %[x1], -19456 \n\t"
yading@10 194
yading@10 195 : [x1]"+r"(x1)
yading@10 196 : [j]"r"(j)
yading@10 197 : "memory"
yading@10 198 );
yading@10 199
yading@10 200 if (i_Temp != 0) {
yading@10 201
yading@10 202 X_low1=&X_low[0][2][0];
yading@10 203
yading@10 204 for (k = 0; k < sbr->kx[0]; k++) {
yading@10 205
yading@10 206 __asm__ volatile (
yading@10 207 "move %[i], $zero \n\t"
yading@10 208 "2: \n\t"
yading@10 209 "lw %[temp0], 0(%[X_low1]) \n\t"
yading@10 210 "lw %[temp1], 4(%[X_low1]) \n\t"
yading@10 211 "sw %[temp0], 0(%[x1]) \n\t"
yading@10 212 "sw %[temp1], 9728(%[x1]) \n\t"
yading@10 213 "addiu %[x1], %[x1], 256 \n\t"
yading@10 214 "addiu %[X_low1], %[X_low1], 8 \n\t"
yading@10 215 "addiu %[i], %[i], 1 \n\t"
yading@10 216 "bne %[i], %[i_Temp], 2b \n\t"
yading@10 217
yading@10 218 : [x1]"+r"(x1), [X_low1]"+r"(X_low1), [i]"=&r"(i),
yading@10 219 [temp0]"=&r"(temp0), [temp1]"=&r"(temp1)
yading@10 220 : [i_Temp]"r"(i_Temp)
yading@10 221 : "memory"
yading@10 222 );
yading@10 223 x1-=(i_Temp<<6)-1;
yading@10 224 X_low1-=(i_Temp<<1)-80;
yading@10 225 }
yading@10 226
yading@10 227 x1=&X[0][0][k];
yading@10 228 Y01=(float*)&Y0[32][k][0];
yading@10 229
yading@10 230 for (; k < sbr->kx[0] + sbr->m[0]; k++) {
yading@10 231 __asm__ volatile (
yading@10 232 "move %[i], $zero \n\t"
yading@10 233 "3: \n\t"
yading@10 234 "lw %[temp0], 0(%[Y01]) \n\t"
yading@10 235 "lw %[temp1], 4(%[Y01]) \n\t"
yading@10 236 "sw %[temp0], 0(%[x1]) \n\t"
yading@10 237 "sw %[temp1], 9728(%[x1]) \n\t"
yading@10 238 "addiu %[x1], %[x1], 256 \n\t"
yading@10 239 "addiu %[Y01], %[Y01], 512 \n\t"
yading@10 240 "addiu %[i], %[i], 1 \n\t"
yading@10 241 "bne %[i], %[i_Temp], 3b \n\t"
yading@10 242
yading@10 243 : [x1]"+r"(x1), [Y01]"+r"(Y01), [i]"=&r"(i),
yading@10 244 [temp0]"=&r"(temp0), [temp1]"=&r"(temp1)
yading@10 245 : [i_Temp]"r"(i_Temp)
yading@10 246 : "memory"
yading@10 247 );
yading@10 248 x1 -=(i_Temp<<6)-1;
yading@10 249 Y01 -=(i_Temp<<7)-2;
yading@10 250 }
yading@10 251 }
yading@10 252
yading@10 253 x1=&X[0][i_Temp][0];
yading@10 254 X_low1=&X_low[0][i_Temp+2][0];
yading@10 255 temp3=38;
yading@10 256
yading@10 257 for (k = 0; k < sbr->kx[1]; k++) {
yading@10 258
yading@10 259 __asm__ volatile (
yading@10 260 "move %[i], %[i_Temp] \n\t"
yading@10 261 "4: \n\t"
yading@10 262 "lw %[temp0], 0(%[X_low1]) \n\t"
yading@10 263 "lw %[temp1], 4(%[X_low1]) \n\t"
yading@10 264 "sw %[temp0], 0(%[x1]) \n\t"
yading@10 265 "sw %[temp1], 9728(%[x1]) \n\t"
yading@10 266 "addiu %[x1], %[x1], 256 \n\t"
yading@10 267 "addiu %[X_low1], %[X_low1], 8 \n\t"
yading@10 268 "addiu %[i], %[i], 1 \n\t"
yading@10 269 "bne %[i], %[temp3], 4b \n\t"
yading@10 270
yading@10 271 : [x1]"+r"(x1), [X_low1]"+r"(X_low1), [i]"=&r"(i),
yading@10 272 [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
yading@10 273 [temp2]"=&r"(temp2)
yading@10 274 : [i_Temp]"r"(i_Temp), [temp3]"r"(temp3)
yading@10 275 : "memory"
yading@10 276 );
yading@10 277 x1 -= ((38-i_Temp)<<6)-1;
yading@10 278 X_low1 -= ((38-i_Temp)<<1)- 80;
yading@10 279 }
yading@10 280
yading@10 281 x1=&X[0][i_Temp][k];
yading@10 282 Y11=&Y1[i_Temp][k][0];
yading@10 283 temp2=32;
yading@10 284
yading@10 285 for (; k < sbr->kx[1] + sbr->m[1]; k++) {
yading@10 286
yading@10 287 __asm__ volatile (
yading@10 288 "move %[i], %[i_Temp] \n\t"
yading@10 289 "5: \n\t"
yading@10 290 "lw %[temp0], 0(%[Y11]) \n\t"
yading@10 291 "lw %[temp1], 4(%[Y11]) \n\t"
yading@10 292 "sw %[temp0], 0(%[x1]) \n\t"
yading@10 293 "sw %[temp1], 9728(%[x1]) \n\t"
yading@10 294 "addiu %[x1], %[x1], 256 \n\t"
yading@10 295 "addiu %[Y11], %[Y11], 512 \n\t"
yading@10 296 "addiu %[i], %[i], 1 \n\t"
yading@10 297 "bne %[i], %[temp2], 5b \n\t"
yading@10 298
yading@10 299 : [x1]"+r"(x1), [Y11]"+r"(Y11), [i]"=&r"(i),
yading@10 300 [temp0]"=&r"(temp0), [temp1]"=&r"(temp1)
yading@10 301 : [i_Temp]"r"(i_Temp), [temp3]"r"(temp3),
yading@10 302 [temp2]"r"(temp2)
yading@10 303 : "memory"
yading@10 304 );
yading@10 305
yading@10 306 x1 -= ((32-i_Temp)<<6)-1;
yading@10 307 Y11 -= ((32-i_Temp)<<7)-2;
yading@10 308 }
yading@10 309 return 0;
yading@10 310 }
yading@10 311
yading@10 312 #if HAVE_MIPSFPU
yading@10 313 static void sbr_hf_assemble_mips(float Y1[38][64][2],
yading@10 314 const float X_high[64][40][2],
yading@10 315 SpectralBandReplication *sbr, SBRData *ch_data,
yading@10 316 const int e_a[2])
yading@10 317 {
yading@10 318 int e, i, j, m;
yading@10 319 const int h_SL = 4 * !sbr->bs_smoothing_mode;
yading@10 320 const int kx = sbr->kx[1];
yading@10 321 const int m_max = sbr->m[1];
yading@10 322 static const float h_smooth[5] = {
yading@10 323 0.33333333333333,
yading@10 324 0.30150283239582,
yading@10 325 0.21816949906249,
yading@10 326 0.11516383427084,
yading@10 327 0.03183050093751,
yading@10 328 };
yading@10 329
yading@10 330 float (*g_temp)[48] = ch_data->g_temp, (*q_temp)[48] = ch_data->q_temp;
yading@10 331 int indexnoise = ch_data->f_indexnoise;
yading@10 332 int indexsine = ch_data->f_indexsine;
yading@10 333 float *g_temp1, *q_temp1, *pok, *pok1;
yading@10 334 float temp1, temp2, temp3, temp4;
yading@10 335 int size = m_max;
yading@10 336
yading@10 337 if (sbr->reset) {
yading@10 338 for (i = 0; i < h_SL; i++) {
yading@10 339 memcpy(g_temp[i + 2*ch_data->t_env[0]], sbr->gain[0], m_max * sizeof(sbr->gain[0][0]));
yading@10 340 memcpy(q_temp[i + 2*ch_data->t_env[0]], sbr->q_m[0], m_max * sizeof(sbr->q_m[0][0]));
yading@10 341 }
yading@10 342 } else if (h_SL) {
yading@10 343 memcpy(g_temp[2*ch_data->t_env[0]], g_temp[2*ch_data->t_env_num_env_old], 4*sizeof(g_temp[0]));
yading@10 344 memcpy(q_temp[2*ch_data->t_env[0]], q_temp[2*ch_data->t_env_num_env_old], 4*sizeof(q_temp[0]));
yading@10 345 }
yading@10 346
yading@10 347 for (e = 0; e < ch_data->bs_num_env; e++) {
yading@10 348 for (i = 2 * ch_data->t_env[e]; i < 2 * ch_data->t_env[e + 1]; i++) {
yading@10 349 g_temp1 = g_temp[h_SL + i];
yading@10 350 pok = sbr->gain[e];
yading@10 351 q_temp1 = q_temp[h_SL + i];
yading@10 352 pok1 = sbr->q_m[e];
yading@10 353
yading@10 354 /* loop unrolled 4 times */
yading@10 355 for (j=0; j<(size>>2); j++) {
yading@10 356 __asm__ volatile (
yading@10 357 "lw %[temp1], 0(%[pok]) \n\t"
yading@10 358 "lw %[temp2], 4(%[pok]) \n\t"
yading@10 359 "lw %[temp3], 8(%[pok]) \n\t"
yading@10 360 "lw %[temp4], 12(%[pok]) \n\t"
yading@10 361 "sw %[temp1], 0(%[g_temp1]) \n\t"
yading@10 362 "sw %[temp2], 4(%[g_temp1]) \n\t"
yading@10 363 "sw %[temp3], 8(%[g_temp1]) \n\t"
yading@10 364 "sw %[temp4], 12(%[g_temp1]) \n\t"
yading@10 365 "lw %[temp1], 0(%[pok1]) \n\t"
yading@10 366 "lw %[temp2], 4(%[pok1]) \n\t"
yading@10 367 "lw %[temp3], 8(%[pok1]) \n\t"
yading@10 368 "lw %[temp4], 12(%[pok1]) \n\t"
yading@10 369 "sw %[temp1], 0(%[q_temp1]) \n\t"
yading@10 370 "sw %[temp2], 4(%[q_temp1]) \n\t"
yading@10 371 "sw %[temp3], 8(%[q_temp1]) \n\t"
yading@10 372 "sw %[temp4], 12(%[q_temp1]) \n\t"
yading@10 373 "addiu %[pok], %[pok], 16 \n\t"
yading@10 374 "addiu %[g_temp1], %[g_temp1], 16 \n\t"
yading@10 375 "addiu %[pok1], %[pok1], 16 \n\t"
yading@10 376 "addiu %[q_temp1], %[q_temp1], 16 \n\t"
yading@10 377
yading@10 378 : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
yading@10 379 [temp3]"=&r"(temp3), [temp4]"=&r"(temp4),
yading@10 380 [pok]"+r"(pok), [g_temp1]"+r"(g_temp1),
yading@10 381 [pok1]"+r"(pok1), [q_temp1]"+r"(q_temp1)
yading@10 382 :
yading@10 383 : "memory"
yading@10 384 );
yading@10 385 }
yading@10 386
yading@10 387 for (j=0; j<(size&3); j++) {
yading@10 388 __asm__ volatile (
yading@10 389 "lw %[temp1], 0(%[pok]) \n\t"
yading@10 390 "lw %[temp2], 0(%[pok1]) \n\t"
yading@10 391 "sw %[temp1], 0(%[g_temp1]) \n\t"
yading@10 392 "sw %[temp2], 0(%[q_temp1]) \n\t"
yading@10 393 "addiu %[pok], %[pok], 4 \n\t"
yading@10 394 "addiu %[g_temp1], %[g_temp1], 4 \n\t"
yading@10 395 "addiu %[pok1], %[pok1], 4 \n\t"
yading@10 396 "addiu %[q_temp1], %[q_temp1], 4 \n\t"
yading@10 397
yading@10 398 : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
yading@10 399 [temp3]"=&r"(temp3), [temp4]"=&r"(temp4),
yading@10 400 [pok]"+r"(pok), [g_temp1]"+r"(g_temp1),
yading@10 401 [pok1]"+r"(pok1), [q_temp1]"+r"(q_temp1)
yading@10 402 :
yading@10 403 : "memory"
yading@10 404 );
yading@10 405 }
yading@10 406 }
yading@10 407 }
yading@10 408
yading@10 409 for (e = 0; e < ch_data->bs_num_env; e++) {
yading@10 410 for (i = 2 * ch_data->t_env[e]; i < 2 * ch_data->t_env[e + 1]; i++) {
yading@10 411 LOCAL_ALIGNED_16(float, g_filt_tab, [48]);
yading@10 412 LOCAL_ALIGNED_16(float, q_filt_tab, [48]);
yading@10 413 float *g_filt, *q_filt;
yading@10 414
yading@10 415 if (h_SL && e != e_a[0] && e != e_a[1]) {
yading@10 416 g_filt = g_filt_tab;
yading@10 417 q_filt = q_filt_tab;
yading@10 418
yading@10 419 for (m = 0; m < m_max; m++) {
yading@10 420 const int idx1 = i + h_SL;
yading@10 421 g_filt[m] = 0.0f;
yading@10 422 q_filt[m] = 0.0f;
yading@10 423
yading@10 424 for (j = 0; j <= h_SL; j++) {
yading@10 425 g_filt[m] += g_temp[idx1 - j][m] * h_smooth[j];
yading@10 426 q_filt[m] += q_temp[idx1 - j][m] * h_smooth[j];
yading@10 427 }
yading@10 428 }
yading@10 429 } else {
yading@10 430 g_filt = g_temp[i + h_SL];
yading@10 431 q_filt = q_temp[i];
yading@10 432 }
yading@10 433
yading@10 434 sbr->dsp.hf_g_filt(Y1[i] + kx, X_high + kx, g_filt, m_max,
yading@10 435 i + ENVELOPE_ADJUSTMENT_OFFSET);
yading@10 436
yading@10 437 if (e != e_a[0] && e != e_a[1]) {
yading@10 438 sbr->dsp.hf_apply_noise[indexsine](Y1[i] + kx, sbr->s_m[e],
yading@10 439 q_filt, indexnoise,
yading@10 440 kx, m_max);
yading@10 441 } else {
yading@10 442 int idx = indexsine&1;
yading@10 443 int A = (1-((indexsine+(kx & 1))&2));
yading@10 444 int B = (A^(-idx)) + idx;
yading@10 445 float *out = &Y1[i][kx][idx];
yading@10 446 float *in = sbr->s_m[e];
yading@10 447 float temp0, temp1, temp2, temp3, temp4, temp5;
yading@10 448 float A_f = (float)A;
yading@10 449 float B_f = (float)B;
yading@10 450
yading@10 451 for (m = 0; m+1 < m_max; m+=2) {
yading@10 452
yading@10 453 temp2 = out[0];
yading@10 454 temp3 = out[2];
yading@10 455
yading@10 456 __asm__ volatile(
yading@10 457 "lwc1 %[temp0], 0(%[in]) \n\t"
yading@10 458 "lwc1 %[temp1], 4(%[in]) \n\t"
yading@10 459 "madd.s %[temp4], %[temp2], %[temp0], %[A_f] \n\t"
yading@10 460 "madd.s %[temp5], %[temp3], %[temp1], %[B_f] \n\t"
yading@10 461 "swc1 %[temp4], 0(%[out]) \n\t"
yading@10 462 "swc1 %[temp5], 8(%[out]) \n\t"
yading@10 463 "addiu %[in], %[in], 8 \n\t"
yading@10 464 "addiu %[out], %[out], 16 \n\t"
yading@10 465
yading@10 466 : [temp0]"=&f" (temp0), [temp1]"=&f"(temp1),
yading@10 467 [temp4]"=&f" (temp4), [temp5]"=&f"(temp5),
yading@10 468 [in]"+r"(in), [out]"+r"(out)
yading@10 469 : [A_f]"f"(A_f), [B_f]"f"(B_f), [temp2]"f"(temp2),
yading@10 470 [temp3]"f"(temp3)
yading@10 471 : "memory"
yading@10 472 );
yading@10 473 }
yading@10 474 if(m_max&1)
yading@10 475 out[2*m ] += in[m ] * A;
yading@10 476 }
yading@10 477 indexnoise = (indexnoise + m_max) & 0x1ff;
yading@10 478 indexsine = (indexsine + 1) & 3;
yading@10 479 }
yading@10 480 }
yading@10 481 ch_data->f_indexnoise = indexnoise;
yading@10 482 ch_data->f_indexsine = indexsine;
yading@10 483 }
yading@10 484
yading@10 485 static void sbr_hf_inverse_filter_mips(SBRDSPContext *dsp,
yading@10 486 float (*alpha0)[2], float (*alpha1)[2],
yading@10 487 const float X_low[32][40][2], int k0)
yading@10 488 {
yading@10 489 int k;
yading@10 490 float temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, c;
yading@10 491 float *phi1, *alpha_1, *alpha_0, res1, res2, temp_real, temp_im;
yading@10 492
yading@10 493 c = 1.000001f;
yading@10 494
yading@10 495 for (k = 0; k < k0; k++) {
yading@10 496 LOCAL_ALIGNED_16(float, phi, [3], [2][2]);
yading@10 497 float dk;
yading@10 498 phi1 = &phi[0][0][0];
yading@10 499 alpha_1 = &alpha1[k][0];
yading@10 500 alpha_0 = &alpha0[k][0];
yading@10 501 dsp->autocorrelate(X_low[k], phi);
yading@10 502
yading@10 503 __asm__ volatile (
yading@10 504 "lwc1 %[temp0], 40(%[phi1]) \n\t"
yading@10 505 "lwc1 %[temp1], 16(%[phi1]) \n\t"
yading@10 506 "lwc1 %[temp2], 24(%[phi1]) \n\t"
yading@10 507 "lwc1 %[temp3], 28(%[phi1]) \n\t"
yading@10 508 "mul.s %[dk], %[temp0], %[temp1] \n\t"
yading@10 509 "lwc1 %[temp4], 0(%[phi1]) \n\t"
yading@10 510 "mul.s %[res2], %[temp2], %[temp2] \n\t"
yading@10 511 "lwc1 %[temp5], 4(%[phi1]) \n\t"
yading@10 512 "madd.s %[res2], %[res2], %[temp3], %[temp3] \n\t"
yading@10 513 "lwc1 %[temp6], 8(%[phi1]) \n\t"
yading@10 514 "div.s %[res2], %[res2], %[c] \n\t"
yading@10 515 "lwc1 %[temp0], 12(%[phi1]) \n\t"
yading@10 516 "sub.s %[dk], %[dk], %[res2] \n\t"
yading@10 517
yading@10 518 : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
yading@10 519 [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
yading@10 520 [temp6]"=&f"(temp6), [res2]"=&f"(res2), [dk]"=&f"(dk)
yading@10 521 : [phi1]"r"(phi1), [c]"f"(c)
yading@10 522 : "memory"
yading@10 523 );
yading@10 524
yading@10 525 if (!dk) {
yading@10 526 alpha_1[0] = 0;
yading@10 527 alpha_1[1] = 0;
yading@10 528 } else {
yading@10 529 __asm__ volatile (
yading@10 530 "mul.s %[temp_real], %[temp4], %[temp2] \n\t"
yading@10 531 "nmsub.s %[temp_real], %[temp_real], %[temp5], %[temp3] \n\t"
yading@10 532 "nmsub.s %[temp_real], %[temp_real], %[temp6], %[temp1] \n\t"
yading@10 533 "mul.s %[temp_im], %[temp4], %[temp3] \n\t"
yading@10 534 "madd.s %[temp_im], %[temp_im], %[temp5], %[temp2] \n\t"
yading@10 535 "nmsub.s %[temp_im], %[temp_im], %[temp0], %[temp1] \n\t"
yading@10 536 "div.s %[temp_real], %[temp_real], %[dk] \n\t"
yading@10 537 "div.s %[temp_im], %[temp_im], %[dk] \n\t"
yading@10 538 "swc1 %[temp_real], 0(%[alpha_1]) \n\t"
yading@10 539 "swc1 %[temp_im], 4(%[alpha_1]) \n\t"
yading@10 540
yading@10 541 : [temp_real]"=&f" (temp_real), [temp_im]"=&f"(temp_im)
yading@10 542 : [phi1]"r"(phi1), [temp0]"f"(temp0), [temp1]"f"(temp1),
yading@10 543 [temp2]"f"(temp2), [temp3]"f"(temp3), [temp4]"f"(temp4),
yading@10 544 [temp5]"f"(temp5), [temp6]"f"(temp6),
yading@10 545 [alpha_1]"r"(alpha_1), [dk]"f"(dk)
yading@10 546 : "memory"
yading@10 547 );
yading@10 548 }
yading@10 549
yading@10 550 if (!phi1[4]) {
yading@10 551 alpha_0[0] = 0;
yading@10 552 alpha_0[1] = 0;
yading@10 553 } else {
yading@10 554 __asm__ volatile (
yading@10 555 "lwc1 %[temp6], 0(%[alpha_1]) \n\t"
yading@10 556 "lwc1 %[temp7], 4(%[alpha_1]) \n\t"
yading@10 557 "mul.s %[temp_real], %[temp6], %[temp2] \n\t"
yading@10 558 "add.s %[temp_real], %[temp_real], %[temp4] \n\t"
yading@10 559 "madd.s %[temp_real], %[temp_real], %[temp7], %[temp3] \n\t"
yading@10 560 "mul.s %[temp_im], %[temp7], %[temp2] \n\t"
yading@10 561 "add.s %[temp_im], %[temp_im], %[temp5] \n\t"
yading@10 562 "nmsub.s %[temp_im], %[temp_im], %[temp6], %[temp3] \n\t"
yading@10 563 "div.s %[temp_real], %[temp_real], %[temp1] \n\t"
yading@10 564 "div.s %[temp_im], %[temp_im], %[temp1] \n\t"
yading@10 565 "neg.s %[temp_real], %[temp_real] \n\t"
yading@10 566 "neg.s %[temp_im], %[temp_im] \n\t"
yading@10 567 "swc1 %[temp_real], 0(%[alpha_0]) \n\t"
yading@10 568 "swc1 %[temp_im], 4(%[alpha_0]) \n\t"
yading@10 569
yading@10 570 : [temp_real]"=&f"(temp_real), [temp_im]"=&f"(temp_im),
yading@10 571 [temp6]"=&f"(temp6), [temp7]"=&f"(temp7),
yading@10 572 [res1]"=&f"(res1), [res2]"=&f"(res2)
yading@10 573 : [alpha_1]"r"(alpha_1), [alpha_0]"r"(alpha_0),
yading@10 574 [temp0]"f"(temp0), [temp1]"f"(temp1), [temp2]"f"(temp2),
yading@10 575 [temp3]"f"(temp3), [temp4]"f"(temp4), [temp5]"f"(temp5)
yading@10 576 : "memory"
yading@10 577 );
yading@10 578 }
yading@10 579
yading@10 580 __asm__ volatile (
yading@10 581 "lwc1 %[temp1], 0(%[alpha_1]) \n\t"
yading@10 582 "lwc1 %[temp2], 4(%[alpha_1]) \n\t"
yading@10 583 "lwc1 %[temp_real], 0(%[alpha_0]) \n\t"
yading@10 584 "lwc1 %[temp_im], 4(%[alpha_0]) \n\t"
yading@10 585 "mul.s %[res1], %[temp1], %[temp1] \n\t"
yading@10 586 "madd.s %[res1], %[res1], %[temp2], %[temp2] \n\t"
yading@10 587 "mul.s %[res2], %[temp_real], %[temp_real] \n\t"
yading@10 588 "madd.s %[res2], %[res2], %[temp_im], %[temp_im] \n\t"
yading@10 589
yading@10 590 : [temp_real]"=&f"(temp_real), [temp_im]"=&f"(temp_im),
yading@10 591 [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
yading@10 592 [res1]"=&f"(res1), [res2]"=&f"(res2)
yading@10 593 : [alpha_1]"r"(alpha_1), [alpha_0]"r"(alpha_0)
yading@10 594 : "memory"
yading@10 595 );
yading@10 596
yading@10 597 if (res1 >= 16.0f || res2 >= 16.0f) {
yading@10 598 alpha_1[0] = 0;
yading@10 599 alpha_1[1] = 0;
yading@10 600 alpha_0[0] = 0;
yading@10 601 alpha_0[1] = 0;
yading@10 602 }
yading@10 603 }
yading@10 604 }
yading@10 605 #endif /* HAVE_MIPSFPU */
yading@10 606 #endif /* HAVE_INLINE_ASM */
yading@10 607
yading@10 608 void ff_aacsbr_func_ptr_init_mips(AACSBRContext *c)
yading@10 609 {
yading@10 610 #if HAVE_INLINE_ASM
yading@10 611 c->sbr_lf_gen = sbr_lf_gen_mips;
yading@10 612 c->sbr_x_gen = sbr_x_gen_mips;
yading@10 613 #if HAVE_MIPSFPU
yading@10 614 c->sbr_hf_inverse_filter = sbr_hf_inverse_filter_mips;
yading@10 615 c->sbr_hf_assemble = sbr_hf_assemble_mips;
yading@10 616 #endif /* HAVE_MIPSFPU */
yading@10 617 #endif /* HAVE_INLINE_ASM */
yading@10 618 }