acelp_filters_mips.c
Go to the documentation of this file.
1  /*
2  * Copyright (c) 2012
3  * MIPS Technologies, Inc., California.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
14  * contributors may be used to endorse or promote products derived from
15  * this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * Author: Nedeljko Babic (nbabic@mips.com)
30  *
31  * various filters for ACELP-based codecs optimized for MIPS
32  *
33  * This file is part of FFmpeg.
34  *
35  * FFmpeg is free software; you can redistribute it and/or
36  * modify it under the terms of the GNU Lesser General Public
37  * License as published by the Free Software Foundation; either
38  * version 2.1 of the License, or (at your option) any later version.
39  *
40  * FFmpeg is distributed in the hope that it will be useful,
41  * but WITHOUT ANY WARRANTY; without even the implied warranty of
42  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
43  * Lesser General Public License for more details.
44  *
45  * You should have received a copy of the GNU Lesser General Public
46  * License along with FFmpeg; if not, write to the Free Software
47  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
48  */
49 
50 /**
51  * @file
52  * Reference: libavcodec/acelp_filters.c
53  */
54 #include "config.h"
55 #include "libavutil/attributes.h"
57 
58 #if HAVE_INLINE_ASM
59 static void ff_acelp_interpolatef_mips(float *out, const float *in,
60  const float *filter_coeffs, int precision,
61  int frac_pos, int filter_length, int length)
62 {
63  int n, i;
64  int prec = precision * 4;
65  int fc_offset = precision - frac_pos;
66  float in_val_p, in_val_m, fc_val_p, fc_val_m;
67 
68  for (n = 0; n < length; n++) {
69  /**
70  * four pointers are defined in order to minimize number of
71  * computations done in inner loop
72  */
73  const float *p_in_p = &in[n];
74  const float *p_in_m = &in[n-1];
75  const float *p_filter_coeffs_p = &filter_coeffs[frac_pos];
76  const float *p_filter_coeffs_m = filter_coeffs + fc_offset;
77  float v = 0;
78 
79  for (i = 0; i < filter_length;i++) {
80  __asm__ volatile (
81  "lwc1 %[in_val_p], 0(%[p_in_p]) \n\t"
82  "lwc1 %[fc_val_p], 0(%[p_filter_coeffs_p]) \n\t"
83  "lwc1 %[in_val_m], 0(%[p_in_m]) \n\t"
84  "lwc1 %[fc_val_m], 0(%[p_filter_coeffs_m]) \n\t"
85  "addiu %[p_in_p], %[p_in_p], 4 \n\t"
86  "madd.s %[v],%[v], %[in_val_p],%[fc_val_p] \n\t"
87  "addiu %[p_in_m], %[p_in_m], -4 \n\t"
88  "addu %[p_filter_coeffs_p], %[p_filter_coeffs_p], %[prec] \n\t"
89  "addu %[p_filter_coeffs_m], %[p_filter_coeffs_m], %[prec] \n\t"
90  "madd.s %[v],%[v],%[in_val_m], %[fc_val_m] \n\t"
91 
92  : [v] "=&f" (v),[p_in_p] "+r" (p_in_p), [p_in_m] "+r" (p_in_m),
93  [p_filter_coeffs_p] "+r" (p_filter_coeffs_p),
94  [in_val_p] "=&f" (in_val_p), [in_val_m] "=&f" (in_val_m),
95  [fc_val_p] "=&f" (fc_val_p), [fc_val_m] "=&f" (fc_val_m),
96  [p_filter_coeffs_m] "+r" (p_filter_coeffs_m)
97  : [prec] "r" (prec)
98  : "memory"
99  );
100  }
101  out[n] = v;
102  }
103 }
104 
105 static void ff_acelp_apply_order_2_transfer_function_mips(float *out, const float *in,
106  const float zero_coeffs[2],
107  const float pole_coeffs[2],
108  float gain, float mem[2], int n)
109 {
110  /**
111  * loop is unrolled eight times
112  */
113 
114  __asm__ volatile (
115  "lwc1 $f0, 0(%[mem]) \n\t"
116  "blez %[n], ff_acelp_apply_order_2_transfer_function_end%= \n\t"
117  "lwc1 $f1, 4(%[mem]) \n\t"
118  "lwc1 $f2, 0(%[pole_coeffs]) \n\t"
119  "lwc1 $f3, 4(%[pole_coeffs]) \n\t"
120  "lwc1 $f4, 0(%[zero_coeffs]) \n\t"
121  "lwc1 $f5, 4(%[zero_coeffs]) \n\t"
122 
123  "ff_acelp_apply_order_2_transfer_function_madd%=: \n\t"
124 
125  "lwc1 $f6, 0(%[in]) \n\t"
126  "mul.s $f9, $f3, $f1 \n\t"
127  "mul.s $f7, $f2, $f0 \n\t"
128  "msub.s $f7, $f7, %[gain], $f6 \n\t"
129  "sub.s $f7, $f7, $f9 \n\t"
130  "madd.s $f8, $f7, $f4, $f0 \n\t"
131  "madd.s $f8, $f8, $f5, $f1 \n\t"
132  "lwc1 $f11, 4(%[in]) \n\t"
133  "mul.s $f12, $f3, $f0 \n\t"
134  "mul.s $f13, $f2, $f7 \n\t"
135  "msub.s $f13, $f13, %[gain], $f11 \n\t"
136  "sub.s $f13, $f13, $f12 \n\t"
137  "madd.s $f14, $f13, $f4, $f7 \n\t"
138  "madd.s $f14, $f14, $f5, $f0 \n\t"
139  "swc1 $f8, 0(%[out]) \n\t"
140  "lwc1 $f6, 8(%[in]) \n\t"
141  "mul.s $f9, $f3, $f7 \n\t"
142  "mul.s $f15, $f2, $f13 \n\t"
143  "msub.s $f15, $f15, %[gain], $f6 \n\t"
144  "sub.s $f15, $f15, $f9 \n\t"
145  "madd.s $f8, $f15, $f4, $f13 \n\t"
146  "madd.s $f8, $f8, $f5, $f7 \n\t"
147  "swc1 $f14, 4(%[out]) \n\t"
148  "lwc1 $f11, 12(%[in]) \n\t"
149  "mul.s $f12, $f3, $f13 \n\t"
150  "mul.s $f16, $f2, $f15 \n\t"
151  "msub.s $f16, $f16, %[gain], $f11 \n\t"
152  "sub.s $f16, $f16, $f12 \n\t"
153  "madd.s $f14, $f16, $f4, $f15 \n\t"
154  "madd.s $f14, $f14, $f5, $f13 \n\t"
155  "swc1 $f8, 8(%[out]) \n\t"
156  "lwc1 $f6, 16(%[in]) \n\t"
157  "mul.s $f9, $f3, $f15 \n\t"
158  "mul.s $f7, $f2, $f16 \n\t"
159  "msub.s $f7, $f7, %[gain], $f6 \n\t"
160  "sub.s $f7, $f7, $f9 \n\t"
161  "madd.s $f8, $f7, $f4, $f16 \n\t"
162  "madd.s $f8, $f8, $f5, $f15 \n\t"
163  "swc1 $f14, 12(%[out]) \n\t"
164  "lwc1 $f11, 20(%[in]) \n\t"
165  "mul.s $f12, $f3, $f16 \n\t"
166  "mul.s $f13, $f2, $f7 \n\t"
167  "msub.s $f13, $f13, %[gain], $f11 \n\t"
168  "sub.s $f13, $f13, $f12 \n\t"
169  "madd.s $f14, $f13, $f4, $f7 \n\t"
170  "madd.s $f14, $f14, $f5, $f16 \n\t"
171  "swc1 $f8, 16(%[out]) \n\t"
172  "lwc1 $f6, 24(%[in]) \n\t"
173  "mul.s $f9, $f3, $f7 \n\t"
174  "mul.s $f15, $f2, $f13 \n\t"
175  "msub.s $f15, $f15, %[gain], $f6 \n\t"
176  "sub.s $f1, $f15, $f9 \n\t"
177  "madd.s $f8, $f1, $f4, $f13 \n\t"
178  "madd.s $f8, $f8, $f5, $f7 \n\t"
179  "swc1 $f14, 20(%[out]) \n\t"
180  "lwc1 $f11, 28(%[in]) \n\t"
181  "mul.s $f12, $f3, $f13 \n\t"
182  "mul.s $f16, $f2, $f1 \n\t"
183  "msub.s $f16, $f16, %[gain], $f11 \n\t"
184  "sub.s $f0, $f16, $f12 \n\t"
185  "madd.s $f14, $f0, $f4, $f1 \n\t"
186  "madd.s $f14, $f14, $f5, $f13 \n\t"
187  "swc1 $f8, 24(%[out]) \n\t"
188  "addiu %[out], 32 \n\t"
189  "addiu %[in], 32 \n\t"
190  "addiu %[n], -8 \n\t"
191  "swc1 $f14, -4(%[out]) \n\t"
192  "bnez %[n], ff_acelp_apply_order_2_transfer_function_madd%= \n\t"
193  "swc1 $f1, 4(%[mem]) \n\t"
194  "swc1 $f0, 0(%[mem]) \n\t"
195 
196  "ff_acelp_apply_order_2_transfer_function_end%=: \n\t"
197 
198  : [out] "+r" (out),
199  [in] "+r" (in), [gain] "+f" (gain),
200  [n] "+r" (n), [mem] "+r" (mem)
201  : [zero_coeffs] "r" (zero_coeffs),
202  [pole_coeffs] "r" (pole_coeffs)
203  : "$f0", "$f1", "$f2", "$f3", "$f4", "$f5",
204  "$f6", "$f7", "$f8", "$f9", "$f10", "$f11",
205  "$f12", "$f13", "$f14", "$f15", "$f16", "memory"
206  );
207 }
208 #endif /* HAVE_INLINE_ASM */
209 
211 {
212 #if HAVE_INLINE_ASM
213  c->acelp_interpolatef = ff_acelp_interpolatef_mips;
214  c->acelp_apply_order_2_transfer_function = ff_acelp_apply_order_2_transfer_function_mips;
215 #endif
216 }
float v
void(* acelp_interpolatef)(float *out, const float *in, const float *filter_coeffs, int precision, int frac_pos, int filter_length, int length)
Floating point version of ff_acelp_interpolate()
Definition: acelp_filters.h:32
About Git write you should know how to use GIT properly Luckily Git comes with excellent documentation git help man git shows you the available git< command > help man git< command > shows information about the subcommand< command > The most comprehensive manual is the website Git Reference visit they are quite exhaustive You do not need a special username or password All you need is to provide a ssh public key to the Git server admin What follows now is a basic introduction to Git and some FFmpeg specific guidelines Read it at least if you are granted commit privileges to the FFmpeg project you are expected to be familiar with these rules I if not You can get git from etc no matter how small Every one of them has been saved from looking like a fool by this many times It s very easy for stray debug output or cosmetic modifications to slip in
Definition: git-howto.txt:5
Macro definitions for various function/variable attributes.
int mem
Definition: avisynth_c.h:721
void(* acelp_apply_order_2_transfer_function)(float *out, const float *in, const float zero_coeffs[2], const float pole_coeffs[2], float gain, float mem[2], int n)
Apply an order 2 rational transfer function in-place.
Definition: acelp_filters.h:47
synthesis window for stochastic i
static double c[64]
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t,*(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t,*(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t,*(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t,*(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31))))#define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac){}void ff_audio_convert_free(AudioConvert **ac){if(!*ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);}AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map){AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method!=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2){ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc){av_free(ac);return NULL;}return ac;}in_planar=av_sample_fmt_is_planar(in_fmt);out_planar=av_sample_fmt_is_planar(out_fmt);if(in_planar==out_planar){ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar?ac->channels:1;}else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;}int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in){int use_generic=1;int len=in->nb_samples;int p;if(ac->dc){av_dlog(ac->avr,"%d samples - audio_convert: %s to %s (dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> out
const char int length
Definition: avisynth_c.h:668
void ff_acelp_filter_init_mips(ACELPFContext *c)