x86/mpegaudiodec.c
Go to the documentation of this file.
1 /*
2  * MMX optimized MP3 decoding functions
3  * Copyright (c) 2010 Vitor Sessak
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #include "libavutil/attributes.h"
23 #include "libavutil/cpu.h"
24 #include "libavutil/internal.h"
25 #include "libavutil/x86/asm.h"
26 #include "libavutil/x86/cpu.h"
28 
29 #define DECL(CPU)\
30 static void imdct36_blocks_ ## CPU(float *out, float *buf, float *in, int count, int switch_point, int block_type);\
31 void ff_imdct36_float_ ## CPU(float *out, float *buf, float *in, float *win);
32 
33 DECL(sse)
34 DECL(sse2)
35 DECL(sse3)
36 DECL(ssse3)
37 DECL(avx)
38 
39 void ff_four_imdct36_float_sse(float *out, float *buf, float *in, float *win,
40  float *tmpbuf);
41 void ff_four_imdct36_float_avx(float *out, float *buf, float *in, float *win,
42  float *tmpbuf);
43 
44 DECLARE_ALIGNED(16, static float, mdct_win_sse)[2][4][4*40];
45 
46 #if HAVE_SSE2_INLINE
47 
48 #define MACS(rt, ra, rb) rt+=(ra)*(rb)
49 #define MLSS(rt, ra, rb) rt-=(ra)*(rb)
50 
51 #define SUM8(op, sum, w, p) \
52 { \
53  op(sum, (w)[0 * 64], (p)[0 * 64]); \
54  op(sum, (w)[1 * 64], (p)[1 * 64]); \
55  op(sum, (w)[2 * 64], (p)[2 * 64]); \
56  op(sum, (w)[3 * 64], (p)[3 * 64]); \
57  op(sum, (w)[4 * 64], (p)[4 * 64]); \
58  op(sum, (w)[5 * 64], (p)[5 * 64]); \
59  op(sum, (w)[6 * 64], (p)[6 * 64]); \
60  op(sum, (w)[7 * 64], (p)[7 * 64]); \
61 }
62 
63 static void apply_window(const float *buf, const float *win1,
64  const float *win2, float *sum1, float *sum2, int len)
65 {
66  x86_reg count = - 4*len;
67  const float *win1a = win1+len;
68  const float *win2a = win2+len;
69  const float *bufa = buf+len;
70  float *sum1a = sum1+len;
71  float *sum2a = sum2+len;
72 
73 
74 #define MULT(a, b) \
75  "movaps " #a "(%1,%0), %%xmm1 \n\t" \
76  "movaps " #a "(%3,%0), %%xmm2 \n\t" \
77  "mulps %%xmm2, %%xmm1 \n\t" \
78  "subps %%xmm1, %%xmm0 \n\t" \
79  "mulps " #b "(%2,%0), %%xmm2 \n\t" \
80  "subps %%xmm2, %%xmm4 \n\t" \
81 
82  __asm__ volatile(
83  "1: \n\t"
84  "xorps %%xmm0, %%xmm0 \n\t"
85  "xorps %%xmm4, %%xmm4 \n\t"
86 
87  MULT( 0, 0)
88  MULT( 256, 64)
89  MULT( 512, 128)
90  MULT( 768, 192)
91  MULT(1024, 256)
92  MULT(1280, 320)
93  MULT(1536, 384)
94  MULT(1792, 448)
95 
96  "movaps %%xmm0, (%4,%0) \n\t"
97  "movaps %%xmm4, (%5,%0) \n\t"
98  "add $16, %0 \n\t"
99  "jl 1b \n\t"
100  :"+&r"(count)
101  :"r"(win1a), "r"(win2a), "r"(bufa), "r"(sum1a), "r"(sum2a)
102  );
103 
104 #undef MULT
105 }
106 
107 static void apply_window_mp3(float *in, float *win, int *unused, float *out,
108  int incr)
109 {
110  LOCAL_ALIGNED_16(float, suma, [17]);
111  LOCAL_ALIGNED_16(float, sumb, [17]);
112  LOCAL_ALIGNED_16(float, sumc, [17]);
113  LOCAL_ALIGNED_16(float, sumd, [17]);
114 
115  float sum;
116 
117  /* copy to avoid wrap */
118  __asm__ volatile(
119  "movaps 0(%0), %%xmm0 \n\t" \
120  "movaps 16(%0), %%xmm1 \n\t" \
121  "movaps 32(%0), %%xmm2 \n\t" \
122  "movaps 48(%0), %%xmm3 \n\t" \
123  "movaps %%xmm0, 0(%1) \n\t" \
124  "movaps %%xmm1, 16(%1) \n\t" \
125  "movaps %%xmm2, 32(%1) \n\t" \
126  "movaps %%xmm3, 48(%1) \n\t" \
127  "movaps 64(%0), %%xmm0 \n\t" \
128  "movaps 80(%0), %%xmm1 \n\t" \
129  "movaps 96(%0), %%xmm2 \n\t" \
130  "movaps 112(%0), %%xmm3 \n\t" \
131  "movaps %%xmm0, 64(%1) \n\t" \
132  "movaps %%xmm1, 80(%1) \n\t" \
133  "movaps %%xmm2, 96(%1) \n\t" \
134  "movaps %%xmm3, 112(%1) \n\t"
135  ::"r"(in), "r"(in+512)
136  :"memory"
137  );
138 
139  apply_window(in + 16, win , win + 512, suma, sumc, 16);
140  apply_window(in + 32, win + 48, win + 640, sumb, sumd, 16);
141 
142  SUM8(MACS, suma[0], win + 32, in + 48);
143 
144  sumc[ 0] = 0;
145  sumb[16] = 0;
146  sumd[16] = 0;
147 
148 #define SUMS(suma, sumb, sumc, sumd, out1, out2) \
149  "movups " #sumd "(%4), %%xmm0 \n\t" \
150  "shufps $0x1b, %%xmm0, %%xmm0 \n\t" \
151  "subps " #suma "(%1), %%xmm0 \n\t" \
152  "movaps %%xmm0," #out1 "(%0) \n\t" \
153 \
154  "movups " #sumc "(%3), %%xmm0 \n\t" \
155  "shufps $0x1b, %%xmm0, %%xmm0 \n\t" \
156  "addps " #sumb "(%2), %%xmm0 \n\t" \
157  "movaps %%xmm0," #out2 "(%0) \n\t"
158 
159  if (incr == 1) {
160  __asm__ volatile(
161  SUMS( 0, 48, 4, 52, 0, 112)
162  SUMS(16, 32, 20, 36, 16, 96)
163  SUMS(32, 16, 36, 20, 32, 80)
164  SUMS(48, 0, 52, 4, 48, 64)
165 
166  :"+&r"(out)
167  :"r"(&suma[0]), "r"(&sumb[0]), "r"(&sumc[0]), "r"(&sumd[0])
168  :"memory"
169  );
170  out += 16*incr;
171  } else {
172  int j;
173  float *out2 = out + 32 * incr;
174  out[0 ] = -suma[ 0];
175  out += incr;
176  out2 -= incr;
177  for(j=1;j<16;j++) {
178  *out = -suma[ j] + sumd[16-j];
179  *out2 = sumb[16-j] + sumc[ j];
180  out += incr;
181  out2 -= incr;
182  }
183  }
184 
185  sum = 0;
186  SUM8(MLSS, sum, win + 16 + 32, in + 32);
187  *out = sum;
188 }
189 
190 #endif /* HAVE_SSE2_INLINE */
191 
192 #if HAVE_YASM
193 #define DECL_IMDCT_BLOCKS(CPU1, CPU2) \
194 static void imdct36_blocks_ ## CPU1(float *out, float *buf, float *in, \
195  int count, int switch_point, int block_type) \
196 { \
197  int align_end = count - (count & 3); \
198  int j; \
199  for (j = 0; j < align_end; j+= 4) { \
200  LOCAL_ALIGNED_16(float, tmpbuf, [1024]); \
201  float *win = mdct_win_sse[switch_point && j < 4][block_type]; \
202  /* apply window & overlap with previous buffer */ \
203  \
204  /* select window */ \
205  ff_four_imdct36_float_ ## CPU2(out, buf, in, win, tmpbuf); \
206  in += 4*18; \
207  buf += 4*18; \
208  out += 4; \
209  } \
210  for (; j < count; j++) { \
211  /* apply window & overlap with previous buffer */ \
212  \
213  /* select window */ \
214  int win_idx = (switch_point && j < 2) ? 0 : block_type; \
215  float *win = ff_mdct_win_float[win_idx + (4 & -(j & 1))]; \
216  \
217  ff_imdct36_float_ ## CPU1(out, buf, in, win); \
218  \
219  in += 18; \
220  buf++; \
221  out++; \
222  } \
223 }
224 
225 #if HAVE_SSE
226 DECL_IMDCT_BLOCKS(sse,sse)
227 DECL_IMDCT_BLOCKS(sse2,sse)
228 DECL_IMDCT_BLOCKS(sse3,sse)
229 DECL_IMDCT_BLOCKS(ssse3,sse)
230 #endif
231 #if HAVE_AVX_EXTERNAL
232 DECL_IMDCT_BLOCKS(avx,avx)
233 #endif
234 #endif /* HAVE_YASM */
235 
237 {
238  int mm_flags = av_get_cpu_flags();
239 
240  int i, j;
241  for (j = 0; j < 4; j++) {
242  for (i = 0; i < 40; i ++) {
243  mdct_win_sse[0][j][4*i ] = ff_mdct_win_float[j ][i];
244  mdct_win_sse[0][j][4*i + 1] = ff_mdct_win_float[j + 4][i];
245  mdct_win_sse[0][j][4*i + 2] = ff_mdct_win_float[j ][i];
246  mdct_win_sse[0][j][4*i + 3] = ff_mdct_win_float[j + 4][i];
247  mdct_win_sse[1][j][4*i ] = ff_mdct_win_float[0 ][i];
248  mdct_win_sse[1][j][4*i + 1] = ff_mdct_win_float[4 ][i];
249  mdct_win_sse[1][j][4*i + 2] = ff_mdct_win_float[j ][i];
250  mdct_win_sse[1][j][4*i + 3] = ff_mdct_win_float[j + 4][i];
251  }
252  }
253 
254 #if HAVE_SSE2_INLINE
255  if (mm_flags & AV_CPU_FLAG_SSE2) {
257  }
258 #endif /* HAVE_SSE2_INLINE */
259 
260 #if HAVE_YASM
261  if (EXTERNAL_AVX(mm_flags)) {
262  s->imdct36_blocks_float = imdct36_blocks_avx;
263  } else if (EXTERNAL_SSSE3(mm_flags)) {
264  s->imdct36_blocks_float = imdct36_blocks_ssse3;
265  } else if (EXTERNAL_SSE3(mm_flags)) {
266  s->imdct36_blocks_float = imdct36_blocks_sse3;
267  } else if (EXTERNAL_SSE2(mm_flags)) {
268  s->imdct36_blocks_float = imdct36_blocks_sse2;
269  } else if (EXTERNAL_SSE(mm_flags)) {
270  s->imdct36_blocks_float = imdct36_blocks_sse;
271  }
272 #endif /* HAVE_YASM */
273 }
const char * s
Definition: avisynth_c.h:668
static void apply_window_mp3(float *in, float *win, int *unused, float *out, int incr)
#define MULT(a, b)
av_cold void ff_mpadsp_init_x86(MPADSPContext *s)
About Git write you should know how to use GIT properly Luckily Git comes with excellent documentation git help man git shows you the available git< command > help man git< command > shows information about the subcommand< command > The most comprehensive manual is the website Git Reference visit they are quite exhaustive You do not need a special username or password All you need is to provide a ssh public key to the Git server admin What follows now is a basic introduction to Git and some FFmpeg specific guidelines Read it at least if you are granted commit privileges to the FFmpeg project you are expected to be familiar with these rules I if not You can get git from etc no matter how small Every one of them has been saved from looking like a fool by this many times It s very easy for stray debug output or cosmetic modifications to slip in
Definition: git-howto.txt:5
static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride)
#define EXTERNAL_SSE(flags)
Definition: x86/cpu.h:35
float ff_mdct_win_float[8][MDCT_BUF_SIZE]
Macro definitions for various function/variable attributes.
void ff_four_imdct36_float_avx(float *out, float *buf, float *in, float *win, float *tmpbuf)
#define MACS(rt, ra, rb)
#define av_cold
Definition: attributes.h:78
#define EXTERNAL_SSE3(flags)
Definition: x86/cpu.h:37
#define EXTERNAL_SSE2(flags)
Definition: x86/cpu.h:36
void ff_four_imdct36_float_sse(float *out, float *buf, float *in, float *win, float *tmpbuf)
#define SUM8(op, sum, w, p)
common internal API header
void(* imdct36_blocks_float)(float *out, float *buf, float *in, int count, int switch_point, int block_type)
Definition: mpegaudiodsp.h:33
static void(*const apply_window[4])(AVFloatDSPContext *fdsp, SingleChannelElement *sce, const float *audio)
Definition: aacenc.c:240
#define MLSS(rt, ra, rb)
void * buf
Definition: avisynth_c.h:594
#define EXTERNAL_SSSE3(flags)
Definition: x86/cpu.h:38
synthesis window for stochastic i
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:30
#define DECL(CPU)
DECLARE_ALIGNED(16, static float, mdct_win_sse)[2][4][4 *40]
int x86_reg
void(* apply_window_float)(float *synth_buf, float *window, int *dither_state, float *samples, int incr)
Definition: mpegaudiodsp.h:26
int len
#define AV_CPU_FLAG_SSE2
PIV SSE2 functions.
Definition: cpu.h:34
#define LOCAL_ALIGNED_16(t, v,...)
void INT64 INT64 count
Definition: avisynth_c.h:594
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t,*(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t,*(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t,*(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t,*(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31))))#define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac){}void ff_audio_convert_free(AudioConvert **ac){if(!*ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);}AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map){AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method!=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2){ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc){av_free(ac);return NULL;}return ac;}in_planar=av_sample_fmt_is_planar(in_fmt);out_planar=av_sample_fmt_is_planar(out_fmt);if(in_planar==out_planar){ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar?ac->channels:1;}else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;}int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in){int use_generic=1;int len=in->nb_samples;int p;if(ac->dc){av_dlog(ac->avr,"%d samples - audio_convert: %s to %s (dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> out
#define EXTERNAL_AVX(flags)
Definition: x86/cpu.h:41