yading@10
|
1 /*
|
yading@10
|
2 * MMX optimized DSP utils
|
yading@10
|
3 * Copyright (c) 2000, 2001 Fabrice Bellard
|
yading@10
|
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
|
yading@10
|
5 *
|
yading@10
|
6 * This file is part of FFmpeg.
|
yading@10
|
7 *
|
yading@10
|
8 * FFmpeg is free software; you can redistribute it and/or
|
yading@10
|
9 * modify it under the terms of the GNU Lesser General Public
|
yading@10
|
10 * License as published by the Free Software Foundation; either
|
yading@10
|
11 * version 2.1 of the License, or (at your option) any later version.
|
yading@10
|
12 *
|
yading@10
|
13 * FFmpeg is distributed in the hope that it will be useful,
|
yading@10
|
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
yading@10
|
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
yading@10
|
16 * Lesser General Public License for more details.
|
yading@10
|
17 *
|
yading@10
|
18 * You should have received a copy of the GNU Lesser General Public
|
yading@10
|
19 * License along with FFmpeg; if not, write to the Free Software
|
yading@10
|
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
yading@10
|
21 *
|
yading@10
|
22 * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
|
yading@10
|
23 */
|
yading@10
|
24
|
yading@10
|
25 #include "libavutil/cpu.h"
|
yading@10
|
26 #include "libavutil/x86/asm.h"
|
yading@10
|
27 #include "libavcodec/hpeldsp.h"
|
yading@10
|
28 #include "dsputil_mmx.h"
|
yading@10
|
29
|
yading@10
|
30 void ff_put_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels,
|
yading@10
|
31 ptrdiff_t line_size, int h);
|
yading@10
|
32 void ff_put_pixels8_x2_3dnow(uint8_t *block, const uint8_t *pixels,
|
yading@10
|
33 ptrdiff_t line_size, int h);
|
yading@10
|
34 void ff_put_pixels16_x2_mmxext(uint8_t *block, const uint8_t *pixels,
|
yading@10
|
35 ptrdiff_t line_size, int h);
|
yading@10
|
36 void ff_put_pixels16_x2_3dnow(uint8_t *block, const uint8_t *pixels,
|
yading@10
|
37 ptrdiff_t line_size, int h);
|
yading@10
|
38 void ff_put_no_rnd_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels,
|
yading@10
|
39 ptrdiff_t line_size, int h);
|
yading@10
|
40 void ff_put_no_rnd_pixels8_x2_3dnow(uint8_t *block, const uint8_t *pixels,
|
yading@10
|
41 ptrdiff_t line_size, int h);
|
yading@10
|
42 void ff_put_no_rnd_pixels8_x2_exact_mmxext(uint8_t *block,
|
yading@10
|
43 const uint8_t *pixels,
|
yading@10
|
44 ptrdiff_t line_size, int h);
|
yading@10
|
45 void ff_put_no_rnd_pixels8_x2_exact_3dnow(uint8_t *block,
|
yading@10
|
46 const uint8_t *pixels,
|
yading@10
|
47 ptrdiff_t line_size, int h);
|
yading@10
|
48 void ff_put_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
|
yading@10
|
49 ptrdiff_t line_size, int h);
|
yading@10
|
50 void ff_put_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels,
|
yading@10
|
51 ptrdiff_t line_size, int h);
|
yading@10
|
52 void ff_put_no_rnd_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
|
yading@10
|
53 ptrdiff_t line_size, int h);
|
yading@10
|
54 void ff_put_no_rnd_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels,
|
yading@10
|
55 ptrdiff_t line_size, int h);
|
yading@10
|
56 void ff_put_no_rnd_pixels8_y2_exact_mmxext(uint8_t *block,
|
yading@10
|
57 const uint8_t *pixels,
|
yading@10
|
58 ptrdiff_t line_size, int h);
|
yading@10
|
59 void ff_put_no_rnd_pixels8_y2_exact_3dnow(uint8_t *block,
|
yading@10
|
60 const uint8_t *pixels,
|
yading@10
|
61 ptrdiff_t line_size, int h);
|
yading@10
|
62 void ff_avg_pixels8_3dnow(uint8_t *block, const uint8_t *pixels,
|
yading@10
|
63 ptrdiff_t line_size, int h);
|
yading@10
|
64 void ff_avg_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels,
|
yading@10
|
65 ptrdiff_t line_size, int h);
|
yading@10
|
66 void ff_avg_pixels8_x2_3dnow(uint8_t *block, const uint8_t *pixels,
|
yading@10
|
67 ptrdiff_t line_size, int h);
|
yading@10
|
68 void ff_avg_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
|
yading@10
|
69 ptrdiff_t line_size, int h);
|
yading@10
|
70 void ff_avg_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels,
|
yading@10
|
71 ptrdiff_t line_size, int h);
|
yading@10
|
72 void ff_avg_pixels8_xy2_mmxext(uint8_t *block, const uint8_t *pixels,
|
yading@10
|
73 ptrdiff_t line_size, int h);
|
yading@10
|
74 void ff_avg_pixels8_xy2_3dnow(uint8_t *block, const uint8_t *pixels,
|
yading@10
|
75 ptrdiff_t line_size, int h);
|
yading@10
|
76
|
yading@10
|
77
|
yading@10
|
78 #if HAVE_INLINE_ASM
|
yading@10
|
79
|
yading@10
|
80 #define JUMPALIGN() __asm__ volatile (".p2align 3"::)
|
yading@10
|
81 #define MOVQ_ZERO(regd) __asm__ volatile ("pxor %%"#regd", %%"#regd ::)
|
yading@10
|
82
|
yading@10
|
83 #define MOVQ_BFE(regd) \
|
yading@10
|
84 __asm__ volatile ( \
|
yading@10
|
85 "pcmpeqd %%"#regd", %%"#regd" \n\t" \
|
yading@10
|
86 "paddb %%"#regd", %%"#regd" \n\t" ::)
|
yading@10
|
87
|
yading@10
|
88 #ifndef PIC
|
yading@10
|
89 #define MOVQ_BONE(regd) __asm__ volatile ("movq %0, %%"#regd" \n\t" :: "m"(ff_bone))
|
yading@10
|
90 #define MOVQ_WTWO(regd) __asm__ volatile ("movq %0, %%"#regd" \n\t" :: "m"(ff_wtwo))
|
yading@10
|
91 #else
|
yading@10
|
92 // for shared library it's better to use this way for accessing constants
|
yading@10
|
93 // pcmpeqd -> -1
|
yading@10
|
94 #define MOVQ_BONE(regd) \
|
yading@10
|
95 __asm__ volatile ( \
|
yading@10
|
96 "pcmpeqd %%"#regd", %%"#regd" \n\t" \
|
yading@10
|
97 "psrlw $15, %%"#regd" \n\t" \
|
yading@10
|
98 "packuswb %%"#regd", %%"#regd" \n\t" ::)
|
yading@10
|
99
|
yading@10
|
100 #define MOVQ_WTWO(regd) \
|
yading@10
|
101 __asm__ volatile ( \
|
yading@10
|
102 "pcmpeqd %%"#regd", %%"#regd" \n\t" \
|
yading@10
|
103 "psrlw $15, %%"#regd" \n\t" \
|
yading@10
|
104 "psllw $1, %%"#regd" \n\t"::)
|
yading@10
|
105
|
yading@10
|
106 #endif
|
yading@10
|
107
|
yading@10
|
108 // using regr as temporary and for the output result
|
yading@10
|
109 // first argument is unmodifed and second is trashed
|
yading@10
|
110 // regfe is supposed to contain 0xfefefefefefefefe
|
yading@10
|
111 #define PAVGB_MMX_NO_RND(rega, regb, regr, regfe) \
|
yading@10
|
112 "movq "#rega", "#regr" \n\t" \
|
yading@10
|
113 "pand "#regb", "#regr" \n\t" \
|
yading@10
|
114 "pxor "#rega", "#regb" \n\t" \
|
yading@10
|
115 "pand "#regfe", "#regb" \n\t" \
|
yading@10
|
116 "psrlq $1, "#regb" \n\t" \
|
yading@10
|
117 "paddb "#regb", "#regr" \n\t"
|
yading@10
|
118
|
yading@10
|
119 #define PAVGB_MMX(rega, regb, regr, regfe) \
|
yading@10
|
120 "movq "#rega", "#regr" \n\t" \
|
yading@10
|
121 "por "#regb", "#regr" \n\t" \
|
yading@10
|
122 "pxor "#rega", "#regb" \n\t" \
|
yading@10
|
123 "pand "#regfe", "#regb" \n\t" \
|
yading@10
|
124 "psrlq $1, "#regb" \n\t" \
|
yading@10
|
125 "psubb "#regb", "#regr" \n\t"
|
yading@10
|
126
|
yading@10
|
127 // mm6 is supposed to contain 0xfefefefefefefefe
|
yading@10
|
128 #define PAVGBP_MMX_NO_RND(rega, regb, regr, regc, regd, regp) \
|
yading@10
|
129 "movq "#rega", "#regr" \n\t" \
|
yading@10
|
130 "movq "#regc", "#regp" \n\t" \
|
yading@10
|
131 "pand "#regb", "#regr" \n\t" \
|
yading@10
|
132 "pand "#regd", "#regp" \n\t" \
|
yading@10
|
133 "pxor "#rega", "#regb" \n\t" \
|
yading@10
|
134 "pxor "#regc", "#regd" \n\t" \
|
yading@10
|
135 "pand %%mm6, "#regb" \n\t" \
|
yading@10
|
136 "pand %%mm6, "#regd" \n\t" \
|
yading@10
|
137 "psrlq $1, "#regb" \n\t" \
|
yading@10
|
138 "psrlq $1, "#regd" \n\t" \
|
yading@10
|
139 "paddb "#regb", "#regr" \n\t" \
|
yading@10
|
140 "paddb "#regd", "#regp" \n\t"
|
yading@10
|
141
|
yading@10
|
142 #define PAVGBP_MMX(rega, regb, regr, regc, regd, regp) \
|
yading@10
|
143 "movq "#rega", "#regr" \n\t" \
|
yading@10
|
144 "movq "#regc", "#regp" \n\t" \
|
yading@10
|
145 "por "#regb", "#regr" \n\t" \
|
yading@10
|
146 "por "#regd", "#regp" \n\t" \
|
yading@10
|
147 "pxor "#rega", "#regb" \n\t" \
|
yading@10
|
148 "pxor "#regc", "#regd" \n\t" \
|
yading@10
|
149 "pand %%mm6, "#regb" \n\t" \
|
yading@10
|
150 "pand %%mm6, "#regd" \n\t" \
|
yading@10
|
151 "psrlq $1, "#regd" \n\t" \
|
yading@10
|
152 "psrlq $1, "#regb" \n\t" \
|
yading@10
|
153 "psubb "#regb", "#regr" \n\t" \
|
yading@10
|
154 "psubb "#regd", "#regp" \n\t"
|
yading@10
|
155
|
yading@10
|
156 /***********************************/
|
yading@10
|
157 /* MMX no rounding */
|
yading@10
|
158 #define NO_RND 1
|
yading@10
|
159 #define DEF(x, y) x ## _no_rnd_ ## y ## _mmx
|
yading@10
|
160 #define SET_RND MOVQ_WONE
|
yading@10
|
161 #define PAVGBP(a, b, c, d, e, f) PAVGBP_MMX_NO_RND(a, b, c, d, e, f)
|
yading@10
|
162 #define PAVGB(a, b, c, e) PAVGB_MMX_NO_RND(a, b, c, e)
|
yading@10
|
163 #define OP_AVG(a, b, c, e) PAVGB_MMX(a, b, c, e)
|
yading@10
|
164
|
yading@10
|
165 #include "hpeldsp_rnd_template.c"
|
yading@10
|
166
|
yading@10
|
167 #undef DEF
|
yading@10
|
168 #undef SET_RND
|
yading@10
|
169 #undef PAVGBP
|
yading@10
|
170 #undef PAVGB
|
yading@10
|
171 #undef NO_RND
|
yading@10
|
172 /***********************************/
|
yading@10
|
173 /* MMX rounding */
|
yading@10
|
174
|
yading@10
|
175 #define DEF(x, y) x ## _ ## y ## _mmx
|
yading@10
|
176 #define SET_RND MOVQ_WTWO
|
yading@10
|
177 #define PAVGBP(a, b, c, d, e, f) PAVGBP_MMX(a, b, c, d, e, f)
|
yading@10
|
178 #define PAVGB(a, b, c, e) PAVGB_MMX(a, b, c, e)
|
yading@10
|
179
|
yading@10
|
180 #include "hpeldsp_rnd_template.c"
|
yading@10
|
181
|
yading@10
|
182 #undef DEF
|
yading@10
|
183 #undef SET_RND
|
yading@10
|
184 #undef PAVGBP
|
yading@10
|
185 #undef PAVGB
|
yading@10
|
186 #undef OP_AVG
|
yading@10
|
187
|
yading@10
|
188 #endif /* HAVE_INLINE_ASM */
|
yading@10
|
189
|
yading@10
|
190
|
yading@10
|
191 #if HAVE_YASM
|
yading@10
|
192 #define ff_put_pixels8_mmx ff_put_pixels8_mmxext
|
yading@10
|
193
|
yading@10
|
194 /***********************************/
|
yading@10
|
195 /* 3Dnow specific */
|
yading@10
|
196
|
yading@10
|
197 #define DEF(x) x ## _3dnow
|
yading@10
|
198
|
yading@10
|
199 #include "hpeldsp_avg_template.c"
|
yading@10
|
200
|
yading@10
|
201 #undef DEF
|
yading@10
|
202
|
yading@10
|
203 /***********************************/
|
yading@10
|
204 /* MMXEXT specific */
|
yading@10
|
205
|
yading@10
|
206 #define DEF(x) x ## _mmxext
|
yading@10
|
207
|
yading@10
|
208 #include "hpeldsp_avg_template.c"
|
yading@10
|
209
|
yading@10
|
210 #undef DEF
|
yading@10
|
211
|
yading@10
|
212 #endif /* HAVE_YASM */
|
yading@10
|
213
|
yading@10
|
214
|
yading@10
|
215 #if HAVE_INLINE_ASM
|
yading@10
|
216 #define put_no_rnd_pixels16_mmx put_pixels16_mmx
|
yading@10
|
217 #define put_no_rnd_pixels8_mmx put_pixels8_mmx
|
yading@10
|
218 #define put_pixels16_mmxext put_pixels16_mmx
|
yading@10
|
219 #define put_pixels8_mmxext put_pixels8_mmx
|
yading@10
|
220 #define put_pixels4_mmxext put_pixels4_mmx
|
yading@10
|
221 #define put_no_rnd_pixels16_mmxext put_no_rnd_pixels16_mmx
|
yading@10
|
222 #define put_no_rnd_pixels8_mmxext put_no_rnd_pixels8_mmx
|
yading@10
|
223
|
yading@10
|
224 static void put_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
|
yading@10
|
225 ptrdiff_t line_size, int h)
|
yading@10
|
226 {
|
yading@10
|
227 __asm__ volatile (
|
yading@10
|
228 "lea (%3, %3), %%"REG_a" \n\t"
|
yading@10
|
229 ".p2align 3 \n\t"
|
yading@10
|
230 "1: \n\t"
|
yading@10
|
231 "movq (%1 ), %%mm0 \n\t"
|
yading@10
|
232 "movq (%1, %3), %%mm1 \n\t"
|
yading@10
|
233 "movq %%mm0, (%2) \n\t"
|
yading@10
|
234 "movq %%mm1, (%2, %3) \n\t"
|
yading@10
|
235 "add %%"REG_a", %1 \n\t"
|
yading@10
|
236 "add %%"REG_a", %2 \n\t"
|
yading@10
|
237 "movq (%1 ), %%mm0 \n\t"
|
yading@10
|
238 "movq (%1, %3), %%mm1 \n\t"
|
yading@10
|
239 "movq %%mm0, (%2) \n\t"
|
yading@10
|
240 "movq %%mm1, (%2, %3) \n\t"
|
yading@10
|
241 "add %%"REG_a", %1 \n\t"
|
yading@10
|
242 "add %%"REG_a", %2 \n\t"
|
yading@10
|
243 "subl $4, %0 \n\t"
|
yading@10
|
244 "jnz 1b \n\t"
|
yading@10
|
245 : "+g"(h), "+r"(pixels), "+r"(block)
|
yading@10
|
246 : "r"((x86_reg)line_size)
|
yading@10
|
247 : "%"REG_a, "memory"
|
yading@10
|
248 );
|
yading@10
|
249 }
|
yading@10
|
250
|
yading@10
|
251 static void put_pixels16_mmx(uint8_t *block, const uint8_t *pixels,
|
yading@10
|
252 ptrdiff_t line_size, int h)
|
yading@10
|
253 {
|
yading@10
|
254 __asm__ volatile (
|
yading@10
|
255 "lea (%3, %3), %%"REG_a" \n\t"
|
yading@10
|
256 ".p2align 3 \n\t"
|
yading@10
|
257 "1: \n\t"
|
yading@10
|
258 "movq (%1 ), %%mm0 \n\t"
|
yading@10
|
259 "movq 8(%1 ), %%mm4 \n\t"
|
yading@10
|
260 "movq (%1, %3), %%mm1 \n\t"
|
yading@10
|
261 "movq 8(%1, %3), %%mm5 \n\t"
|
yading@10
|
262 "movq %%mm0, (%2) \n\t"
|
yading@10
|
263 "movq %%mm4, 8(%2) \n\t"
|
yading@10
|
264 "movq %%mm1, (%2, %3) \n\t"
|
yading@10
|
265 "movq %%mm5, 8(%2, %3) \n\t"
|
yading@10
|
266 "add %%"REG_a", %1 \n\t"
|
yading@10
|
267 "add %%"REG_a", %2 \n\t"
|
yading@10
|
268 "movq (%1 ), %%mm0 \n\t"
|
yading@10
|
269 "movq 8(%1 ), %%mm4 \n\t"
|
yading@10
|
270 "movq (%1, %3), %%mm1 \n\t"
|
yading@10
|
271 "movq 8(%1, %3), %%mm5 \n\t"
|
yading@10
|
272 "movq %%mm0, (%2) \n\t"
|
yading@10
|
273 "movq %%mm4, 8(%2) \n\t"
|
yading@10
|
274 "movq %%mm1, (%2, %3) \n\t"
|
yading@10
|
275 "movq %%mm5, 8(%2, %3) \n\t"
|
yading@10
|
276 "add %%"REG_a", %1 \n\t"
|
yading@10
|
277 "add %%"REG_a", %2 \n\t"
|
yading@10
|
278 "subl $4, %0 \n\t"
|
yading@10
|
279 "jnz 1b \n\t"
|
yading@10
|
280 : "+g"(h), "+r"(pixels), "+r"(block)
|
yading@10
|
281 : "r"((x86_reg)line_size)
|
yading@10
|
282 : "%"REG_a, "memory"
|
yading@10
|
283 );
|
yading@10
|
284 }
|
yading@10
|
285 #endif /* HAVE_INLINE_ASM */
|
yading@10
|
286
|
yading@10
|
287 void ff_put_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
|
yading@10
|
288 ptrdiff_t line_size, int h);
|
yading@10
|
289 void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
|
yading@10
|
290 ptrdiff_t line_size, int h);
|
yading@10
|
291
|
yading@10
|
292 #define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU) \
|
yading@10
|
293 do { \
|
yading@10
|
294 c->PFX ## _pixels_tab IDX [0] = PFX ## _pixels ## SIZE ## _ ## CPU; \
|
yading@10
|
295 c->PFX ## _pixels_tab IDX [1] = PFX ## _pixels ## SIZE ## _x2_ ## CPU; \
|
yading@10
|
296 c->PFX ## _pixels_tab IDX [2] = PFX ## _pixels ## SIZE ## _y2_ ## CPU; \
|
yading@10
|
297 c->PFX ## _pixels_tab IDX [3] = PFX ## _pixels ## SIZE ## _xy2_ ## CPU; \
|
yading@10
|
298 } while (0)
|
yading@10
|
299
|
yading@10
|
300 static void hpeldsp_init_mmx(HpelDSPContext *c, int flags, int mm_flags)
|
yading@10
|
301 {
|
yading@10
|
302 #if HAVE_INLINE_ASM
|
yading@10
|
303 SET_HPEL_FUNCS(put, [0], 16, mmx);
|
yading@10
|
304 SET_HPEL_FUNCS(put_no_rnd, [0], 16, mmx);
|
yading@10
|
305 SET_HPEL_FUNCS(avg, [0], 16, mmx);
|
yading@10
|
306 SET_HPEL_FUNCS(avg_no_rnd, , 16, mmx);
|
yading@10
|
307 SET_HPEL_FUNCS(put, [1], 8, mmx);
|
yading@10
|
308 SET_HPEL_FUNCS(put_no_rnd, [1], 8, mmx);
|
yading@10
|
309 SET_HPEL_FUNCS(avg, [1], 8, mmx);
|
yading@10
|
310 #endif /* HAVE_INLINE_ASM */
|
yading@10
|
311 }
|
yading@10
|
312
|
yading@10
|
313 static void hpeldsp_init_mmxext(HpelDSPContext *c, int flags, int mm_flags)
|
yading@10
|
314 {
|
yading@10
|
315 #if HAVE_YASM
|
yading@10
|
316 c->put_pixels_tab[0][1] = ff_put_pixels16_x2_mmxext;
|
yading@10
|
317 c->put_pixels_tab[0][2] = ff_put_pixels16_y2_mmxext;
|
yading@10
|
318
|
yading@10
|
319 c->avg_pixels_tab[0][0] = ff_avg_pixels16_mmxext;
|
yading@10
|
320 c->avg_pixels_tab[0][1] = ff_avg_pixels16_x2_mmxext;
|
yading@10
|
321 c->avg_pixels_tab[0][2] = ff_avg_pixels16_y2_mmxext;
|
yading@10
|
322
|
yading@10
|
323 c->put_pixels_tab[1][1] = ff_put_pixels8_x2_mmxext;
|
yading@10
|
324 c->put_pixels_tab[1][2] = ff_put_pixels8_y2_mmxext;
|
yading@10
|
325
|
yading@10
|
326 c->avg_pixels_tab[1][0] = ff_avg_pixels8_mmxext;
|
yading@10
|
327 c->avg_pixels_tab[1][1] = ff_avg_pixels8_x2_mmxext;
|
yading@10
|
328 c->avg_pixels_tab[1][2] = ff_avg_pixels8_y2_mmxext;
|
yading@10
|
329
|
yading@10
|
330 if (!(flags & CODEC_FLAG_BITEXACT)) {
|
yading@10
|
331 c->put_no_rnd_pixels_tab[0][1] = ff_put_no_rnd_pixels16_x2_mmxext;
|
yading@10
|
332 c->put_no_rnd_pixels_tab[0][2] = ff_put_no_rnd_pixels16_y2_mmxext;
|
yading@10
|
333 c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_mmxext;
|
yading@10
|
334 c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_mmxext;
|
yading@10
|
335
|
yading@10
|
336 c->avg_pixels_tab[0][3] = ff_avg_pixels16_xy2_mmxext;
|
yading@10
|
337 c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_mmxext;
|
yading@10
|
338 }
|
yading@10
|
339
|
yading@10
|
340 if (flags & CODEC_FLAG_BITEXACT && CONFIG_VP3_DECODER) {
|
yading@10
|
341 c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_mmxext;
|
yading@10
|
342 c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_mmxext;
|
yading@10
|
343 }
|
yading@10
|
344 #endif /* HAVE_YASM */
|
yading@10
|
345 }
|
yading@10
|
346
|
yading@10
|
347 static void hpeldsp_init_3dnow(HpelDSPContext *c, int flags, int mm_flags)
|
yading@10
|
348 {
|
yading@10
|
349 #if HAVE_YASM
|
yading@10
|
350 c->put_pixels_tab[0][1] = ff_put_pixels16_x2_3dnow;
|
yading@10
|
351 c->put_pixels_tab[0][2] = ff_put_pixels16_y2_3dnow;
|
yading@10
|
352
|
yading@10
|
353 c->avg_pixels_tab[0][0] = ff_avg_pixels16_3dnow;
|
yading@10
|
354 c->avg_pixels_tab[0][1] = ff_avg_pixels16_x2_3dnow;
|
yading@10
|
355 c->avg_pixels_tab[0][2] = ff_avg_pixels16_y2_3dnow;
|
yading@10
|
356
|
yading@10
|
357 c->put_pixels_tab[1][1] = ff_put_pixels8_x2_3dnow;
|
yading@10
|
358 c->put_pixels_tab[1][2] = ff_put_pixels8_y2_3dnow;
|
yading@10
|
359
|
yading@10
|
360 c->avg_pixels_tab[1][0] = ff_avg_pixels8_3dnow;
|
yading@10
|
361 c->avg_pixels_tab[1][1] = ff_avg_pixels8_x2_3dnow;
|
yading@10
|
362 c->avg_pixels_tab[1][2] = ff_avg_pixels8_y2_3dnow;
|
yading@10
|
363
|
yading@10
|
364 if (!(flags & CODEC_FLAG_BITEXACT)){
|
yading@10
|
365 c->put_no_rnd_pixels_tab[0][1] = ff_put_no_rnd_pixels16_x2_3dnow;
|
yading@10
|
366 c->put_no_rnd_pixels_tab[0][2] = ff_put_no_rnd_pixels16_y2_3dnow;
|
yading@10
|
367 c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_3dnow;
|
yading@10
|
368 c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_3dnow;
|
yading@10
|
369
|
yading@10
|
370 c->avg_pixels_tab[0][3] = ff_avg_pixels16_xy2_3dnow;
|
yading@10
|
371 c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_3dnow;
|
yading@10
|
372 }
|
yading@10
|
373
|
yading@10
|
374 if (flags & CODEC_FLAG_BITEXACT && CONFIG_VP3_DECODER) {
|
yading@10
|
375 c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_3dnow;
|
yading@10
|
376 c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_3dnow;
|
yading@10
|
377 }
|
yading@10
|
378 #endif /* HAVE_YASM */
|
yading@10
|
379 }
|
yading@10
|
380
|
yading@10
|
381 static void hpeldsp_init_sse2(HpelDSPContext *c, int flags, int mm_flags)
|
yading@10
|
382 {
|
yading@10
|
383 #if HAVE_YASM
|
yading@10
|
384 if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) {
|
yading@10
|
385 // these functions are slower than mmx on AMD, but faster on Intel
|
yading@10
|
386 c->put_pixels_tab[0][0] = ff_put_pixels16_sse2;
|
yading@10
|
387 c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_sse2;
|
yading@10
|
388 c->avg_pixels_tab[0][0] = ff_avg_pixels16_sse2;
|
yading@10
|
389 }
|
yading@10
|
390 #endif /* HAVE_YASM */
|
yading@10
|
391 }
|
yading@10
|
392
|
yading@10
|
393 void ff_hpeldsp_init_x86(HpelDSPContext *c, int flags)
|
yading@10
|
394 {
|
yading@10
|
395 int mm_flags = av_get_cpu_flags();
|
yading@10
|
396
|
yading@10
|
397 if (HAVE_MMX && mm_flags & AV_CPU_FLAG_MMX)
|
yading@10
|
398 hpeldsp_init_mmx(c, flags, mm_flags);
|
yading@10
|
399
|
yading@10
|
400 if (mm_flags & AV_CPU_FLAG_MMXEXT)
|
yading@10
|
401 hpeldsp_init_mmxext(c, flags, mm_flags);
|
yading@10
|
402
|
yading@10
|
403 if (mm_flags & AV_CPU_FLAG_3DNOW)
|
yading@10
|
404 hpeldsp_init_3dnow(c, flags, mm_flags);
|
yading@10
|
405
|
yading@10
|
406 if (mm_flags & AV_CPU_FLAG_SSE2)
|
yading@10
|
407 hpeldsp_init_sse2(c, flags, mm_flags);
|
yading@10
|
408 }
|