celp_filters_mips.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2012
3  * MIPS Technologies, Inc., California.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
14  * contributors may be used to endorse or promote products derived from
15  * this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * Author: Nedeljko Babic (nbabic@mips.com)
30  *
31  * various filters for CELP-based codecs optimized for MIPS
32  *
33  * This file is part of FFmpeg.
34  *
35  * FFmpeg is free software; you can redistribute it and/or
36  * modify it under the terms of the GNU Lesser General Public
37  * License as published by the Free Software Foundation; either
38  * version 2.1 of the License, or (at your option) any later version.
39  *
40  * FFmpeg is distributed in the hope that it will be useful,
41  * but WITHOUT ANY WARRANTY; without even the implied warranty of
42  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
43  * Lesser General Public License for more details.
44  *
45  * You should have received a copy of the GNU Lesser General Public
46  * License along with FFmpeg; if not, write to the Free Software
47  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
48  */
49 
50 /**
51  * @file
52  * Reference: libavcodec/celp_filters.c
53  */
54 #include "config.h"
55 #include "libavutil/attributes.h"
56 #include "libavutil/common.h"
58 
59 #if HAVE_INLINE_ASM
60 static void ff_celp_lp_synthesis_filterf_mips(float *out,
61  const float *filter_coeffs,
62  const float* in, int buffer_length,
63  int filter_length)
64 {
65  int i,n;
66 
67  float out0, out1, out2, out3;
68  float old_out0, old_out1, old_out2, old_out3;
69  float a,b,c;
70  const float *p_filter_coeffs;
71  float *p_out;
72 
73  a = filter_coeffs[0];
74  b = filter_coeffs[1];
75  c = filter_coeffs[2];
76  b -= filter_coeffs[0] * filter_coeffs[0];
77  c -= filter_coeffs[1] * filter_coeffs[0];
78  c -= filter_coeffs[0] * b;
79 
80  old_out0 = out[-4];
81  old_out1 = out[-3];
82  old_out2 = out[-2];
83  old_out3 = out[-1];
84  for (n = 0; n <= buffer_length - 4; n+=4) {
85  p_filter_coeffs = filter_coeffs;
86  p_out = out;
87 
88  out0 = in[0];
89  out1 = in[1];
90  out2 = in[2];
91  out3 = in[3];
92 
93  __asm__ volatile(
94  "lwc1 $f2, 8(%[filter_coeffs]) \n\t"
95  "lwc1 $f1, 4(%[filter_coeffs]) \n\t"
96  "lwc1 $f0, 0(%[filter_coeffs]) \n\t"
97  "nmsub.s %[out0], %[out0], $f2, %[old_out1] \n\t"
98  "nmsub.s %[out1], %[out1], $f2, %[old_out2] \n\t"
99  "nmsub.s %[out2], %[out2], $f2, %[old_out3] \n\t"
100  "lwc1 $f3, 12(%[filter_coeffs]) \n\t"
101  "nmsub.s %[out0], %[out0], $f1, %[old_out2] \n\t"
102  "nmsub.s %[out1], %[out1], $f1, %[old_out3] \n\t"
103  "nmsub.s %[out2], %[out2], $f3, %[old_out2] \n\t"
104  "nmsub.s %[out0], %[out0], $f0, %[old_out3] \n\t"
105  "nmsub.s %[out3], %[out3], $f3, %[old_out3] \n\t"
106  "nmsub.s %[out1], %[out1], $f3, %[old_out1] \n\t"
107  "nmsub.s %[out0], %[out0], $f3, %[old_out0] \n\t"
108 
109  : [out0]"+f"(out0), [out1]"+f"(out1),
110  [out2]"+f"(out2), [out3]"+f"(out3)
111  : [old_out0]"f"(old_out0), [old_out1]"f"(old_out1),
112  [old_out2]"f"(old_out2), [old_out3]"f"(old_out3),
113  [filter_coeffs]"r"(filter_coeffs)
114  : "$f0", "$f1", "$f2", "$f3", "$f4", "memory"
115  );
116 
117  for (i = 5; i <= filter_length; i += 2) {
118  __asm__ volatile(
119  "lwc1 %[old_out3], -20(%[p_out]) \n\t"
120  "lwc1 $f5, 16(%[p_filter_coeffs]) \n\t"
121  "addiu %[p_out], -8 \n\t"
122  "addiu %[p_filter_coeffs], 8 \n\t"
123  "nmsub.s %[out1], %[out1], $f5, %[old_out0] \n\t"
124  "nmsub.s %[out3], %[out3], $f5, %[old_out2] \n\t"
125  "lwc1 $f4, 12(%[p_filter_coeffs]) \n\t"
126  "lwc1 %[old_out2], -16(%[p_out]) \n\t"
127  "nmsub.s %[out0], %[out0], $f5, %[old_out3] \n\t"
128  "nmsub.s %[out2], %[out2], $f5, %[old_out1] \n\t"
129  "nmsub.s %[out1], %[out1], $f4, %[old_out3] \n\t"
130  "nmsub.s %[out3], %[out3], $f4, %[old_out1] \n\t"
131  "mov.s %[old_out1], %[old_out3] \n\t"
132  "nmsub.s %[out0], %[out0], $f4, %[old_out2] \n\t"
133  "nmsub.s %[out2], %[out2], $f4, %[old_out0] \n\t"
134 
135  : [out0]"+f"(out0), [out1]"+f"(out1),
136  [out2]"+f"(out2), [out3]"+f"(out3), [old_out0]"+f"(old_out0),
137  [old_out1]"+f"(old_out1), [old_out2]"+f"(old_out2),
138  [old_out3]"+f"(old_out3),[p_filter_coeffs]"+r"(p_filter_coeffs),
139  [p_out]"+r"(p_out)
140  :
141  : "$f4", "$f5", "memory"
142  );
143  FFSWAP(float, old_out0, old_out2);
144  }
145 
146  __asm__ volatile(
147  "nmsub.s %[out3], %[out3], %[a], %[out2] \n\t"
148  "nmsub.s %[out2], %[out2], %[a], %[out1] \n\t"
149  "nmsub.s %[out3], %[out3], %[b], %[out1] \n\t"
150  "nmsub.s %[out1], %[out1], %[a], %[out0] \n\t"
151  "nmsub.s %[out2], %[out2], %[b], %[out0] \n\t"
152  "nmsub.s %[out3], %[out3], %[c], %[out0] \n\t"
153 
154  : [out0]"+f"(out0), [out1]"+f"(out1),
155  [out2]"+f"(out2), [out3]"+f"(out3)
156  : [a]"f"(a), [b]"f"(b), [c]"f"(c)
157  );
158 
159  out[0] = out0;
160  out[1] = out1;
161  out[2] = out2;
162  out[3] = out3;
163 
164  old_out0 = out0;
165  old_out1 = out1;
166  old_out2 = out2;
167  old_out3 = out3;
168 
169  out += 4;
170  in += 4;
171  }
172 
173  out -= n;
174  in -= n;
175  for (; n < buffer_length; n++) {
176  float out_val, out_val_i, fc_val;
177  p_filter_coeffs = filter_coeffs;
178  p_out = &out[n];
179  out_val = in[n];
180  for (i = 1; i <= filter_length; i++) {
181  __asm__ volatile(
182  "lwc1 %[fc_val], 0(%[p_filter_coeffs]) \n\t"
183  "lwc1 %[out_val_i], -4(%[p_out]) \n\t"
184  "addiu %[p_filter_coeffs], 4 \n\t"
185  "addiu %[p_out], -4 \n\t"
186  "nmsub.s %[out_val], %[out_val], %[fc_val], %[out_val_i] \n\t"
187 
188  : [fc_val]"=&f"(fc_val), [out_val]"+f"(out_val),
189  [out_val_i]"=&f"(out_val_i), [p_out]"+r"(p_out),
190  [p_filter_coeffs]"+r"(p_filter_coeffs)
191  :
192  : "memory"
193  );
194  }
195  out[n] = out_val;
196  }
197 }
198 
199 static void ff_celp_lp_zero_synthesis_filterf_mips(float *out,
200  const float *filter_coeffs,
201  const float *in, int buffer_length,
202  int filter_length)
203 {
204  int i,n;
205  float sum_out8, sum_out7, sum_out6, sum_out5, sum_out4, fc_val;
206  float sum_out3, sum_out2, sum_out1;
207  const float *p_filter_coeffs, *p_in;
208 
209  for (n = 0; n < buffer_length; n+=8) {
210  p_in = &in[n];
211  p_filter_coeffs = filter_coeffs;
212  sum_out8 = in[n+7];
213  sum_out7 = in[n+6];
214  sum_out6 = in[n+5];
215  sum_out5 = in[n+4];
216  sum_out4 = in[n+3];
217  sum_out3 = in[n+2];
218  sum_out2 = in[n+1];
219  sum_out1 = in[n];
220  i = filter_length;
221 
222  /* i is always greater than 0
223  * outer loop is unrolled eight times so there is less memory access
224  * inner loop is unrolled two times
225  */
226  __asm__ volatile(
227  "filt_lp_inner%=: \n\t"
228  "lwc1 %[fc_val], 0(%[p_filter_coeffs]) \n\t"
229  "lwc1 $f7, 6*4(%[p_in]) \n\t"
230  "lwc1 $f6, 5*4(%[p_in]) \n\t"
231  "lwc1 $f5, 4*4(%[p_in]) \n\t"
232  "lwc1 $f4, 3*4(%[p_in]) \n\t"
233  "lwc1 $f3, 2*4(%[p_in]) \n\t"
234  "lwc1 $f2, 4(%[p_in]) \n\t"
235  "lwc1 $f1, 0(%[p_in]) \n\t"
236  "lwc1 $f0, -4(%[p_in]) \n\t"
237  "addiu %[i], -2 \n\t"
238  "madd.s %[sum_out8], %[sum_out8], %[fc_val], $f7 \n\t"
239  "madd.s %[sum_out7], %[sum_out7], %[fc_val], $f6 \n\t"
240  "madd.s %[sum_out6], %[sum_out6], %[fc_val], $f5 \n\t"
241  "madd.s %[sum_out5], %[sum_out5], %[fc_val], $f4 \n\t"
242  "madd.s %[sum_out4], %[sum_out4], %[fc_val], $f3 \n\t"
243  "madd.s %[sum_out3], %[sum_out3], %[fc_val], $f2 \n\t"
244  "madd.s %[sum_out2], %[sum_out2], %[fc_val], $f1 \n\t"
245  "madd.s %[sum_out1], %[sum_out1], %[fc_val], $f0 \n\t"
246  "lwc1 %[fc_val], 4(%[p_filter_coeffs]) \n\t"
247  "lwc1 $f7, -8(%[p_in]) \n\t"
248  "addiu %[p_filter_coeffs], 8 \n\t"
249  "addiu %[p_in], -8 \n\t"
250  "madd.s %[sum_out8], %[sum_out8], %[fc_val], $f6 \n\t"
251  "madd.s %[sum_out7], %[sum_out7], %[fc_val], $f5 \n\t"
252  "madd.s %[sum_out6], %[sum_out6], %[fc_val], $f4 \n\t"
253  "madd.s %[sum_out5], %[sum_out5], %[fc_val], $f3 \n\t"
254  "madd.s %[sum_out4], %[sum_out4], %[fc_val], $f2 \n\t"
255  "madd.s %[sum_out3], %[sum_out3], %[fc_val], $f1 \n\t"
256  "madd.s %[sum_out2], %[sum_out2], %[fc_val], $f0 \n\t"
257  "madd.s %[sum_out1], %[sum_out1], %[fc_val], $f7 \n\t"
258  "bgtz %[i], filt_lp_inner%= \n\t"
259 
260  : [sum_out8]"+f"(sum_out8), [sum_out7]"+f"(sum_out7),
261  [sum_out6]"+f"(sum_out6), [sum_out5]"+f"(sum_out5),
262  [sum_out4]"+f"(sum_out4), [sum_out3]"+f"(sum_out3),
263  [sum_out2]"+f"(sum_out2), [sum_out1]"+f"(sum_out1),
264  [fc_val]"=&f"(fc_val), [p_filter_coeffs]"+r"(p_filter_coeffs),
265  [p_in]"+r"(p_in), [i]"+r"(i)
266  :
267  : "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6", "$f7", "memory"
268  );
269 
270  out[n+7] = sum_out8;
271  out[n+6] = sum_out7;
272  out[n+5] = sum_out6;
273  out[n+4] = sum_out5;
274  out[n+3] = sum_out4;
275  out[n+2] = sum_out3;
276  out[n+1] = sum_out2;
277  out[n] = sum_out1;
278  }
279 }
280 #endif /* HAVE_INLINE_ASM */
281 
283 {
284 #if HAVE_INLINE_ASM
285  c->celp_lp_synthesis_filterf = ff_celp_lp_synthesis_filterf_mips;
286  c->celp_lp_zero_synthesis_filterf = ff_celp_lp_zero_synthesis_filterf_mips;
287 #endif
288 }
void ff_celp_filter_init_mips(CELPFContext *c)
About Git write you should know how to use GIT properly Luckily Git comes with excellent documentation git help man git shows you the available git< command > help man git< command > shows information about the subcommand< command > The most comprehensive manual is the website Git Reference visit they are quite exhaustive You do not need a special username or password All you need is to provide a ssh public key to the Git server admin What follows now is a basic introduction to Git and some FFmpeg specific guidelines Read it at least if you are granted commit privileges to the FFmpeg project you are expected to be familiar with these rules I if not You can get git from etc no matter how small Every one of them has been saved from looking like a fool by this many times It s very easy for stray debug output or cosmetic modifications to slip in
Definition: git-howto.txt:5
Macro definitions for various function/variable attributes.
void(* celp_lp_zero_synthesis_filterf)(float *out, const float *filter_coeffs, const float *in, int buffer_length, int filter_length)
LP zero synthesis filter.
Definition: celp_filters.h:65
#define b
Definition: input.c:42
void(* celp_lp_synthesis_filterf)(float *out, const float *filter_coeffs, const float *in, int buffer_length, int filter_length)
LP synthesis filter.
Definition: celp_filters.h:45
synthesis window for stochastic i
common internal and external API header
static double c[64]
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t,*(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t,*(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t,*(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t,*(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31))))#define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac){}void ff_audio_convert_free(AudioConvert **ac){if(!*ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);}AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map){AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method!=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2){ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc){av_free(ac);return NULL;}return ac;}in_planar=av_sample_fmt_is_planar(in_fmt);out_planar=av_sample_fmt_is_planar(out_fmt);if(in_planar==out_planar){ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar?ac->channels:1;}else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;}int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in){int use_generic=1;int len=in->nb_samples;int p;if(ac->dc){av_dlog(ac->avr,"%d samples - audio_convert: %s to %s (dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> out
#define FFSWAP(type, a, b)
Definition: common.h:61