fmtconvert_init.c
Go to the documentation of this file.
1 /*
2  * Format Conversion Utils
3  * Copyright (c) 2000, 2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24 
25 #include "libavutil/attributes.h"
26 #include "libavutil/cpu.h"
27 #include "libavutil/x86/asm.h"
28 #include "libavutil/x86/cpu.h"
29 #include "libavcodec/fmtconvert.h"
30 
31 #if HAVE_YASM
32 
33 void ff_int32_to_float_fmul_scalar_sse (float *dst, const int *src, float mul, int len);
34 void ff_int32_to_float_fmul_scalar_sse2(float *dst, const int *src, float mul, int len);
35 
36 void ff_float_to_int16_3dnow(int16_t *dst, const float *src, long len);
37 void ff_float_to_int16_sse (int16_t *dst, const float *src, long len);
38 void ff_float_to_int16_sse2 (int16_t *dst, const float *src, long len);
39 
40 void ff_float_to_int16_step_3dnow(int16_t *dst, const float *src, long len, long step);
41 void ff_float_to_int16_step_sse (int16_t *dst, const float *src, long len, long step);
42 void ff_float_to_int16_step_sse2 (int16_t *dst, const float *src, long len, long step);
43 
44 void ff_float_to_int16_interleave2_3dnow(int16_t *dst, const float **src, long len);
45 void ff_float_to_int16_interleave2_sse (int16_t *dst, const float **src, long len);
46 void ff_float_to_int16_interleave2_sse2 (int16_t *dst, const float **src, long len);
47 
48 void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len);
49 void ff_float_to_int16_interleave6_3dnow(int16_t *dst, const float **src, int len);
50 void ff_float_to_int16_interleave6_3dnowext(int16_t *dst, const float **src, int len);
51 
52 #define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse
53 
54 #define FLOAT_TO_INT16_INTERLEAVE(cpu) \
55 /* gcc pessimizes register allocation if this is in the same function as float_to_int16_interleave_sse2*/\
56 static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t *dst, const float **src, long len, int channels){\
57  int c;\
58  for(c=0; c<channels; c++){\
59  ff_float_to_int16_step_##cpu(dst+c, src[c], len, channels);\
60  }\
61 }\
62 \
63 static void float_to_int16_interleave_##cpu(int16_t *dst, const float **src, long len, int channels){\
64  if(channels==1)\
65  ff_float_to_int16_##cpu(dst, src[0], len);\
66  else if(channels==2){\
67  ff_float_to_int16_interleave2_##cpu(dst, src, len);\
68  }else if(channels==6){\
69  ff_float_to_int16_interleave6_##cpu(dst, src, len);\
70  }else\
71  float_to_int16_interleave_misc_##cpu(dst, src, len, channels);\
72 }
73 
74 FLOAT_TO_INT16_INTERLEAVE(3dnow)
75 FLOAT_TO_INT16_INTERLEAVE(sse)
76 FLOAT_TO_INT16_INTERLEAVE(sse2)
77 
78 static void float_to_int16_interleave_3dnowext(int16_t *dst, const float **src,
79  long len, int channels)
80 {
81  if(channels==6)
82  ff_float_to_int16_interleave6_3dnowext(dst, src, len);
83  else
84  float_to_int16_interleave_3dnow(dst, src, len, channels);
85 }
86 
87 void ff_float_interleave2_mmx(float *dst, const float **src, unsigned int len);
88 void ff_float_interleave2_sse(float *dst, const float **src, unsigned int len);
89 
90 void ff_float_interleave6_mmx(float *dst, const float **src, unsigned int len);
91 void ff_float_interleave6_sse(float *dst, const float **src, unsigned int len);
92 
93 static void float_interleave_mmx(float *dst, const float **src,
94  unsigned int len, int channels)
95 {
96  if (channels == 2) {
97  ff_float_interleave2_mmx(dst, src, len);
98  } else if (channels == 6)
99  ff_float_interleave6_mmx(dst, src, len);
100  else
101  ff_float_interleave_c(dst, src, len, channels);
102 }
103 
104 static void float_interleave_sse(float *dst, const float **src,
105  unsigned int len, int channels)
106 {
107  if (channels == 2) {
108  ff_float_interleave2_sse(dst, src, len);
109  } else if (channels == 6)
110  ff_float_interleave6_sse(dst, src, len);
111  else
112  ff_float_interleave_c(dst, src, len, channels);
113 }
114 #endif /* HAVE_YASM */
115 
117 {
118 #if HAVE_YASM
119  int mm_flags = av_get_cpu_flags();
120 
121  if (EXTERNAL_MMX(mm_flags)) {
122  c->float_interleave = float_interleave_mmx;
123 
124  if (EXTERNAL_AMD3DNOW(mm_flags)) {
125  if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
126  c->float_to_int16 = ff_float_to_int16_3dnow;
127  c->float_to_int16_interleave = float_to_int16_interleave_3dnow;
128  }
129  }
130  if (EXTERNAL_AMD3DNOWEXT(mm_flags)) {
131  if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
132  c->float_to_int16_interleave = float_to_int16_interleave_3dnowext;
133  }
134  }
135  if (EXTERNAL_SSE(mm_flags)) {
136  c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_sse;
137  c->float_to_int16 = ff_float_to_int16_sse;
138  c->float_to_int16_interleave = float_to_int16_interleave_sse;
139  c->float_interleave = float_interleave_sse;
140  }
141  if (EXTERNAL_SSE2(mm_flags)) {
142  c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_sse2;
143  c->float_to_int16 = ff_float_to_int16_sse2;
144  c->float_to_int16_interleave = float_to_int16_interleave_sse2;
145  }
146  }
147 #endif /* HAVE_YASM */
148 }
#define EXTERNAL_MMX(flags)
Definition: x86/cpu.h:33
av_cold void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx)
void(* int32_to_float_fmul_scalar)(float *dst, const int *src, float mul, int len)
Convert an array of int32_t to float and multiply by a float value.
Definition: fmtconvert.h:38
void(* float_interleave)(float *dst, const float **src, unsigned int len, int channels)
Convert multiple arrays of float to an array of interleaved float.
Definition: fmtconvert.h:83
static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride)
#define EXTERNAL_SSE(flags)
Definition: x86/cpu.h:35
Macro definitions for various function/variable attributes.
#define av_cold
Definition: attributes.h:78
void ff_float_interleave_c(float *dst, const float **src, unsigned int len, int channels)
Definition: fmtconvert.c:60
#define CODEC_FLAG_BITEXACT
Use only bitexact stuff (except (I)DCT).
#define EXTERNAL_SSE2(flags)
Definition: x86/cpu.h:36
void(* float_to_int16_interleave)(int16_t *dst, const float **src, long len, int channels)
Convert multiple arrays of float to an interleaved array of int16_t.
Definition: fmtconvert.h:69
int flags
CODEC_FLAG_*.
void(* float_to_int16)(int16_t *dst, const float *src, long len)
Convert an array of float to an array of int16_t.
Definition: fmtconvert.h:53
#define EXTERNAL_AMD3DNOWEXT(flags)
Definition: x86/cpu.h:32
AVS_Value src
Definition: avisynth_c.h:523
main external API structure.
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:30
static double c[64]
int len
else dst[i][x+y *dst_stride[i]]
Definition: vf_mcdeint.c:160
#define EXTERNAL_AMD3DNOW(flags)
Definition: x86/cpu.h:31
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But a word about which is also called distortion Distortion can be quantified by almost any quality measurement one chooses the sum of squared differences is used but more complex methods that consider psychovisual effects can be used as well It makes no difference in this discussion First step