annotate ffmpeg/libavcodec/x86/hpeldsp_init.c @ 13:844d341cf643 tip

Back up before ISMIR
author Yading Song <yading.song@eecs.qmul.ac.uk>
date Thu, 31 Oct 2013 13:17:06 +0000
parents 6840f77b83aa
children
rev   line source
yading@10 1 /*
yading@10 2 * MMX optimized DSP utils
yading@10 3 * Copyright (c) 2000, 2001 Fabrice Bellard
yading@10 4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
yading@10 5 *
yading@10 6 * This file is part of FFmpeg.
yading@10 7 *
yading@10 8 * FFmpeg is free software; you can redistribute it and/or
yading@10 9 * modify it under the terms of the GNU Lesser General Public
yading@10 10 * License as published by the Free Software Foundation; either
yading@10 11 * version 2.1 of the License, or (at your option) any later version.
yading@10 12 *
yading@10 13 * FFmpeg is distributed in the hope that it will be useful,
yading@10 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
yading@10 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
yading@10 16 * Lesser General Public License for more details.
yading@10 17 *
yading@10 18 * You should have received a copy of the GNU Lesser General Public
yading@10 19 * License along with FFmpeg; if not, write to the Free Software
yading@10 20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
yading@10 21 *
yading@10 22 * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
yading@10 23 */
yading@10 24
yading@10 25 #include "libavutil/cpu.h"
yading@10 26 #include "libavutil/x86/asm.h"
yading@10 27 #include "libavcodec/hpeldsp.h"
yading@10 28 #include "dsputil_mmx.h"
yading@10 29
yading@10 30 void ff_put_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels,
yading@10 31 ptrdiff_t line_size, int h);
yading@10 32 void ff_put_pixels8_x2_3dnow(uint8_t *block, const uint8_t *pixels,
yading@10 33 ptrdiff_t line_size, int h);
yading@10 34 void ff_put_pixels16_x2_mmxext(uint8_t *block, const uint8_t *pixels,
yading@10 35 ptrdiff_t line_size, int h);
yading@10 36 void ff_put_pixels16_x2_3dnow(uint8_t *block, const uint8_t *pixels,
yading@10 37 ptrdiff_t line_size, int h);
yading@10 38 void ff_put_no_rnd_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels,
yading@10 39 ptrdiff_t line_size, int h);
yading@10 40 void ff_put_no_rnd_pixels8_x2_3dnow(uint8_t *block, const uint8_t *pixels,
yading@10 41 ptrdiff_t line_size, int h);
yading@10 42 void ff_put_no_rnd_pixels8_x2_exact_mmxext(uint8_t *block,
yading@10 43 const uint8_t *pixels,
yading@10 44 ptrdiff_t line_size, int h);
yading@10 45 void ff_put_no_rnd_pixels8_x2_exact_3dnow(uint8_t *block,
yading@10 46 const uint8_t *pixels,
yading@10 47 ptrdiff_t line_size, int h);
yading@10 48 void ff_put_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
yading@10 49 ptrdiff_t line_size, int h);
yading@10 50 void ff_put_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels,
yading@10 51 ptrdiff_t line_size, int h);
yading@10 52 void ff_put_no_rnd_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
yading@10 53 ptrdiff_t line_size, int h);
yading@10 54 void ff_put_no_rnd_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels,
yading@10 55 ptrdiff_t line_size, int h);
yading@10 56 void ff_put_no_rnd_pixels8_y2_exact_mmxext(uint8_t *block,
yading@10 57 const uint8_t *pixels,
yading@10 58 ptrdiff_t line_size, int h);
yading@10 59 void ff_put_no_rnd_pixels8_y2_exact_3dnow(uint8_t *block,
yading@10 60 const uint8_t *pixels,
yading@10 61 ptrdiff_t line_size, int h);
yading@10 62 void ff_avg_pixels8_3dnow(uint8_t *block, const uint8_t *pixels,
yading@10 63 ptrdiff_t line_size, int h);
yading@10 64 void ff_avg_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels,
yading@10 65 ptrdiff_t line_size, int h);
yading@10 66 void ff_avg_pixels8_x2_3dnow(uint8_t *block, const uint8_t *pixels,
yading@10 67 ptrdiff_t line_size, int h);
yading@10 68 void ff_avg_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
yading@10 69 ptrdiff_t line_size, int h);
yading@10 70 void ff_avg_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels,
yading@10 71 ptrdiff_t line_size, int h);
yading@10 72 void ff_avg_pixels8_xy2_mmxext(uint8_t *block, const uint8_t *pixels,
yading@10 73 ptrdiff_t line_size, int h);
yading@10 74 void ff_avg_pixels8_xy2_3dnow(uint8_t *block, const uint8_t *pixels,
yading@10 75 ptrdiff_t line_size, int h);
yading@10 76
yading@10 77
yading@10 78 #if HAVE_INLINE_ASM
yading@10 79
yading@10 80 #define JUMPALIGN() __asm__ volatile (".p2align 3"::)
yading@10 81 #define MOVQ_ZERO(regd) __asm__ volatile ("pxor %%"#regd", %%"#regd ::)
yading@10 82
yading@10 83 #define MOVQ_BFE(regd) \
yading@10 84 __asm__ volatile ( \
yading@10 85 "pcmpeqd %%"#regd", %%"#regd" \n\t" \
yading@10 86 "paddb %%"#regd", %%"#regd" \n\t" ::)
yading@10 87
yading@10 88 #ifndef PIC
yading@10 89 #define MOVQ_BONE(regd) __asm__ volatile ("movq %0, %%"#regd" \n\t" :: "m"(ff_bone))
yading@10 90 #define MOVQ_WTWO(regd) __asm__ volatile ("movq %0, %%"#regd" \n\t" :: "m"(ff_wtwo))
yading@10 91 #else
yading@10 92 // for shared library it's better to use this way for accessing constants
yading@10 93 // pcmpeqd -> -1
yading@10 94 #define MOVQ_BONE(regd) \
yading@10 95 __asm__ volatile ( \
yading@10 96 "pcmpeqd %%"#regd", %%"#regd" \n\t" \
yading@10 97 "psrlw $15, %%"#regd" \n\t" \
yading@10 98 "packuswb %%"#regd", %%"#regd" \n\t" ::)
yading@10 99
yading@10 100 #define MOVQ_WTWO(regd) \
yading@10 101 __asm__ volatile ( \
yading@10 102 "pcmpeqd %%"#regd", %%"#regd" \n\t" \
yading@10 103 "psrlw $15, %%"#regd" \n\t" \
yading@10 104 "psllw $1, %%"#regd" \n\t"::)
yading@10 105
yading@10 106 #endif
yading@10 107
yading@10 108 // using regr as temporary and for the output result
yading@10 109 // first argument is unmodifed and second is trashed
yading@10 110 // regfe is supposed to contain 0xfefefefefefefefe
yading@10 111 #define PAVGB_MMX_NO_RND(rega, regb, regr, regfe) \
yading@10 112 "movq "#rega", "#regr" \n\t" \
yading@10 113 "pand "#regb", "#regr" \n\t" \
yading@10 114 "pxor "#rega", "#regb" \n\t" \
yading@10 115 "pand "#regfe", "#regb" \n\t" \
yading@10 116 "psrlq $1, "#regb" \n\t" \
yading@10 117 "paddb "#regb", "#regr" \n\t"
yading@10 118
yading@10 119 #define PAVGB_MMX(rega, regb, regr, regfe) \
yading@10 120 "movq "#rega", "#regr" \n\t" \
yading@10 121 "por "#regb", "#regr" \n\t" \
yading@10 122 "pxor "#rega", "#regb" \n\t" \
yading@10 123 "pand "#regfe", "#regb" \n\t" \
yading@10 124 "psrlq $1, "#regb" \n\t" \
yading@10 125 "psubb "#regb", "#regr" \n\t"
yading@10 126
yading@10 127 // mm6 is supposed to contain 0xfefefefefefefefe
yading@10 128 #define PAVGBP_MMX_NO_RND(rega, regb, regr, regc, regd, regp) \
yading@10 129 "movq "#rega", "#regr" \n\t" \
yading@10 130 "movq "#regc", "#regp" \n\t" \
yading@10 131 "pand "#regb", "#regr" \n\t" \
yading@10 132 "pand "#regd", "#regp" \n\t" \
yading@10 133 "pxor "#rega", "#regb" \n\t" \
yading@10 134 "pxor "#regc", "#regd" \n\t" \
yading@10 135 "pand %%mm6, "#regb" \n\t" \
yading@10 136 "pand %%mm6, "#regd" \n\t" \
yading@10 137 "psrlq $1, "#regb" \n\t" \
yading@10 138 "psrlq $1, "#regd" \n\t" \
yading@10 139 "paddb "#regb", "#regr" \n\t" \
yading@10 140 "paddb "#regd", "#regp" \n\t"
yading@10 141
yading@10 142 #define PAVGBP_MMX(rega, regb, regr, regc, regd, regp) \
yading@10 143 "movq "#rega", "#regr" \n\t" \
yading@10 144 "movq "#regc", "#regp" \n\t" \
yading@10 145 "por "#regb", "#regr" \n\t" \
yading@10 146 "por "#regd", "#regp" \n\t" \
yading@10 147 "pxor "#rega", "#regb" \n\t" \
yading@10 148 "pxor "#regc", "#regd" \n\t" \
yading@10 149 "pand %%mm6, "#regb" \n\t" \
yading@10 150 "pand %%mm6, "#regd" \n\t" \
yading@10 151 "psrlq $1, "#regd" \n\t" \
yading@10 152 "psrlq $1, "#regb" \n\t" \
yading@10 153 "psubb "#regb", "#regr" \n\t" \
yading@10 154 "psubb "#regd", "#regp" \n\t"
yading@10 155
yading@10 156 /***********************************/
yading@10 157 /* MMX no rounding */
yading@10 158 #define NO_RND 1
yading@10 159 #define DEF(x, y) x ## _no_rnd_ ## y ## _mmx
yading@10 160 #define SET_RND MOVQ_WONE
yading@10 161 #define PAVGBP(a, b, c, d, e, f) PAVGBP_MMX_NO_RND(a, b, c, d, e, f)
yading@10 162 #define PAVGB(a, b, c, e) PAVGB_MMX_NO_RND(a, b, c, e)
yading@10 163 #define OP_AVG(a, b, c, e) PAVGB_MMX(a, b, c, e)
yading@10 164
yading@10 165 #include "hpeldsp_rnd_template.c"
yading@10 166
yading@10 167 #undef DEF
yading@10 168 #undef SET_RND
yading@10 169 #undef PAVGBP
yading@10 170 #undef PAVGB
yading@10 171 #undef NO_RND
yading@10 172 /***********************************/
yading@10 173 /* MMX rounding */
yading@10 174
yading@10 175 #define DEF(x, y) x ## _ ## y ## _mmx
yading@10 176 #define SET_RND MOVQ_WTWO
yading@10 177 #define PAVGBP(a, b, c, d, e, f) PAVGBP_MMX(a, b, c, d, e, f)
yading@10 178 #define PAVGB(a, b, c, e) PAVGB_MMX(a, b, c, e)
yading@10 179
yading@10 180 #include "hpeldsp_rnd_template.c"
yading@10 181
yading@10 182 #undef DEF
yading@10 183 #undef SET_RND
yading@10 184 #undef PAVGBP
yading@10 185 #undef PAVGB
yading@10 186 #undef OP_AVG
yading@10 187
yading@10 188 #endif /* HAVE_INLINE_ASM */
yading@10 189
yading@10 190
yading@10 191 #if HAVE_YASM
yading@10 192 #define ff_put_pixels8_mmx ff_put_pixels8_mmxext
yading@10 193
yading@10 194 /***********************************/
yading@10 195 /* 3Dnow specific */
yading@10 196
yading@10 197 #define DEF(x) x ## _3dnow
yading@10 198
yading@10 199 #include "hpeldsp_avg_template.c"
yading@10 200
yading@10 201 #undef DEF
yading@10 202
yading@10 203 /***********************************/
yading@10 204 /* MMXEXT specific */
yading@10 205
yading@10 206 #define DEF(x) x ## _mmxext
yading@10 207
yading@10 208 #include "hpeldsp_avg_template.c"
yading@10 209
yading@10 210 #undef DEF
yading@10 211
yading@10 212 #endif /* HAVE_YASM */
yading@10 213
yading@10 214
yading@10 215 #if HAVE_INLINE_ASM
yading@10 216 #define put_no_rnd_pixels16_mmx put_pixels16_mmx
yading@10 217 #define put_no_rnd_pixels8_mmx put_pixels8_mmx
yading@10 218 #define put_pixels16_mmxext put_pixels16_mmx
yading@10 219 #define put_pixels8_mmxext put_pixels8_mmx
yading@10 220 #define put_pixels4_mmxext put_pixels4_mmx
yading@10 221 #define put_no_rnd_pixels16_mmxext put_no_rnd_pixels16_mmx
yading@10 222 #define put_no_rnd_pixels8_mmxext put_no_rnd_pixels8_mmx
yading@10 223
yading@10 224 static void put_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
yading@10 225 ptrdiff_t line_size, int h)
yading@10 226 {
yading@10 227 __asm__ volatile (
yading@10 228 "lea (%3, %3), %%"REG_a" \n\t"
yading@10 229 ".p2align 3 \n\t"
yading@10 230 "1: \n\t"
yading@10 231 "movq (%1 ), %%mm0 \n\t"
yading@10 232 "movq (%1, %3), %%mm1 \n\t"
yading@10 233 "movq %%mm0, (%2) \n\t"
yading@10 234 "movq %%mm1, (%2, %3) \n\t"
yading@10 235 "add %%"REG_a", %1 \n\t"
yading@10 236 "add %%"REG_a", %2 \n\t"
yading@10 237 "movq (%1 ), %%mm0 \n\t"
yading@10 238 "movq (%1, %3), %%mm1 \n\t"
yading@10 239 "movq %%mm0, (%2) \n\t"
yading@10 240 "movq %%mm1, (%2, %3) \n\t"
yading@10 241 "add %%"REG_a", %1 \n\t"
yading@10 242 "add %%"REG_a", %2 \n\t"
yading@10 243 "subl $4, %0 \n\t"
yading@10 244 "jnz 1b \n\t"
yading@10 245 : "+g"(h), "+r"(pixels), "+r"(block)
yading@10 246 : "r"((x86_reg)line_size)
yading@10 247 : "%"REG_a, "memory"
yading@10 248 );
yading@10 249 }
yading@10 250
yading@10 251 static void put_pixels16_mmx(uint8_t *block, const uint8_t *pixels,
yading@10 252 ptrdiff_t line_size, int h)
yading@10 253 {
yading@10 254 __asm__ volatile (
yading@10 255 "lea (%3, %3), %%"REG_a" \n\t"
yading@10 256 ".p2align 3 \n\t"
yading@10 257 "1: \n\t"
yading@10 258 "movq (%1 ), %%mm0 \n\t"
yading@10 259 "movq 8(%1 ), %%mm4 \n\t"
yading@10 260 "movq (%1, %3), %%mm1 \n\t"
yading@10 261 "movq 8(%1, %3), %%mm5 \n\t"
yading@10 262 "movq %%mm0, (%2) \n\t"
yading@10 263 "movq %%mm4, 8(%2) \n\t"
yading@10 264 "movq %%mm1, (%2, %3) \n\t"
yading@10 265 "movq %%mm5, 8(%2, %3) \n\t"
yading@10 266 "add %%"REG_a", %1 \n\t"
yading@10 267 "add %%"REG_a", %2 \n\t"
yading@10 268 "movq (%1 ), %%mm0 \n\t"
yading@10 269 "movq 8(%1 ), %%mm4 \n\t"
yading@10 270 "movq (%1, %3), %%mm1 \n\t"
yading@10 271 "movq 8(%1, %3), %%mm5 \n\t"
yading@10 272 "movq %%mm0, (%2) \n\t"
yading@10 273 "movq %%mm4, 8(%2) \n\t"
yading@10 274 "movq %%mm1, (%2, %3) \n\t"
yading@10 275 "movq %%mm5, 8(%2, %3) \n\t"
yading@10 276 "add %%"REG_a", %1 \n\t"
yading@10 277 "add %%"REG_a", %2 \n\t"
yading@10 278 "subl $4, %0 \n\t"
yading@10 279 "jnz 1b \n\t"
yading@10 280 : "+g"(h), "+r"(pixels), "+r"(block)
yading@10 281 : "r"((x86_reg)line_size)
yading@10 282 : "%"REG_a, "memory"
yading@10 283 );
yading@10 284 }
yading@10 285 #endif /* HAVE_INLINE_ASM */
yading@10 286
yading@10 287 void ff_put_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
yading@10 288 ptrdiff_t line_size, int h);
yading@10 289 void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
yading@10 290 ptrdiff_t line_size, int h);
yading@10 291
yading@10 292 #define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU) \
yading@10 293 do { \
yading@10 294 c->PFX ## _pixels_tab IDX [0] = PFX ## _pixels ## SIZE ## _ ## CPU; \
yading@10 295 c->PFX ## _pixels_tab IDX [1] = PFX ## _pixels ## SIZE ## _x2_ ## CPU; \
yading@10 296 c->PFX ## _pixels_tab IDX [2] = PFX ## _pixels ## SIZE ## _y2_ ## CPU; \
yading@10 297 c->PFX ## _pixels_tab IDX [3] = PFX ## _pixels ## SIZE ## _xy2_ ## CPU; \
yading@10 298 } while (0)
yading@10 299
yading@10 300 static void hpeldsp_init_mmx(HpelDSPContext *c, int flags, int mm_flags)
yading@10 301 {
yading@10 302 #if HAVE_INLINE_ASM
yading@10 303 SET_HPEL_FUNCS(put, [0], 16, mmx);
yading@10 304 SET_HPEL_FUNCS(put_no_rnd, [0], 16, mmx);
yading@10 305 SET_HPEL_FUNCS(avg, [0], 16, mmx);
yading@10 306 SET_HPEL_FUNCS(avg_no_rnd, , 16, mmx);
yading@10 307 SET_HPEL_FUNCS(put, [1], 8, mmx);
yading@10 308 SET_HPEL_FUNCS(put_no_rnd, [1], 8, mmx);
yading@10 309 SET_HPEL_FUNCS(avg, [1], 8, mmx);
yading@10 310 #endif /* HAVE_INLINE_ASM */
yading@10 311 }
yading@10 312
yading@10 313 static void hpeldsp_init_mmxext(HpelDSPContext *c, int flags, int mm_flags)
yading@10 314 {
yading@10 315 #if HAVE_YASM
yading@10 316 c->put_pixels_tab[0][1] = ff_put_pixels16_x2_mmxext;
yading@10 317 c->put_pixels_tab[0][2] = ff_put_pixels16_y2_mmxext;
yading@10 318
yading@10 319 c->avg_pixels_tab[0][0] = ff_avg_pixels16_mmxext;
yading@10 320 c->avg_pixels_tab[0][1] = ff_avg_pixels16_x2_mmxext;
yading@10 321 c->avg_pixels_tab[0][2] = ff_avg_pixels16_y2_mmxext;
yading@10 322
yading@10 323 c->put_pixels_tab[1][1] = ff_put_pixels8_x2_mmxext;
yading@10 324 c->put_pixels_tab[1][2] = ff_put_pixels8_y2_mmxext;
yading@10 325
yading@10 326 c->avg_pixels_tab[1][0] = ff_avg_pixels8_mmxext;
yading@10 327 c->avg_pixels_tab[1][1] = ff_avg_pixels8_x2_mmxext;
yading@10 328 c->avg_pixels_tab[1][2] = ff_avg_pixels8_y2_mmxext;
yading@10 329
yading@10 330 if (!(flags & CODEC_FLAG_BITEXACT)) {
yading@10 331 c->put_no_rnd_pixels_tab[0][1] = ff_put_no_rnd_pixels16_x2_mmxext;
yading@10 332 c->put_no_rnd_pixels_tab[0][2] = ff_put_no_rnd_pixels16_y2_mmxext;
yading@10 333 c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_mmxext;
yading@10 334 c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_mmxext;
yading@10 335
yading@10 336 c->avg_pixels_tab[0][3] = ff_avg_pixels16_xy2_mmxext;
yading@10 337 c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_mmxext;
yading@10 338 }
yading@10 339
yading@10 340 if (flags & CODEC_FLAG_BITEXACT && CONFIG_VP3_DECODER) {
yading@10 341 c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_mmxext;
yading@10 342 c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_mmxext;
yading@10 343 }
yading@10 344 #endif /* HAVE_YASM */
yading@10 345 }
yading@10 346
yading@10 347 static void hpeldsp_init_3dnow(HpelDSPContext *c, int flags, int mm_flags)
yading@10 348 {
yading@10 349 #if HAVE_YASM
yading@10 350 c->put_pixels_tab[0][1] = ff_put_pixels16_x2_3dnow;
yading@10 351 c->put_pixels_tab[0][2] = ff_put_pixels16_y2_3dnow;
yading@10 352
yading@10 353 c->avg_pixels_tab[0][0] = ff_avg_pixels16_3dnow;
yading@10 354 c->avg_pixels_tab[0][1] = ff_avg_pixels16_x2_3dnow;
yading@10 355 c->avg_pixels_tab[0][2] = ff_avg_pixels16_y2_3dnow;
yading@10 356
yading@10 357 c->put_pixels_tab[1][1] = ff_put_pixels8_x2_3dnow;
yading@10 358 c->put_pixels_tab[1][2] = ff_put_pixels8_y2_3dnow;
yading@10 359
yading@10 360 c->avg_pixels_tab[1][0] = ff_avg_pixels8_3dnow;
yading@10 361 c->avg_pixels_tab[1][1] = ff_avg_pixels8_x2_3dnow;
yading@10 362 c->avg_pixels_tab[1][2] = ff_avg_pixels8_y2_3dnow;
yading@10 363
yading@10 364 if (!(flags & CODEC_FLAG_BITEXACT)){
yading@10 365 c->put_no_rnd_pixels_tab[0][1] = ff_put_no_rnd_pixels16_x2_3dnow;
yading@10 366 c->put_no_rnd_pixels_tab[0][2] = ff_put_no_rnd_pixels16_y2_3dnow;
yading@10 367 c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_3dnow;
yading@10 368 c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_3dnow;
yading@10 369
yading@10 370 c->avg_pixels_tab[0][3] = ff_avg_pixels16_xy2_3dnow;
yading@10 371 c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_3dnow;
yading@10 372 }
yading@10 373
yading@10 374 if (flags & CODEC_FLAG_BITEXACT && CONFIG_VP3_DECODER) {
yading@10 375 c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_3dnow;
yading@10 376 c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_3dnow;
yading@10 377 }
yading@10 378 #endif /* HAVE_YASM */
yading@10 379 }
yading@10 380
yading@10 381 static void hpeldsp_init_sse2(HpelDSPContext *c, int flags, int mm_flags)
yading@10 382 {
yading@10 383 #if HAVE_YASM
yading@10 384 if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) {
yading@10 385 // these functions are slower than mmx on AMD, but faster on Intel
yading@10 386 c->put_pixels_tab[0][0] = ff_put_pixels16_sse2;
yading@10 387 c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_sse2;
yading@10 388 c->avg_pixels_tab[0][0] = ff_avg_pixels16_sse2;
yading@10 389 }
yading@10 390 #endif /* HAVE_YASM */
yading@10 391 }
yading@10 392
yading@10 393 void ff_hpeldsp_init_x86(HpelDSPContext *c, int flags)
yading@10 394 {
yading@10 395 int mm_flags = av_get_cpu_flags();
yading@10 396
yading@10 397 if (HAVE_MMX && mm_flags & AV_CPU_FLAG_MMX)
yading@10 398 hpeldsp_init_mmx(c, flags, mm_flags);
yading@10 399
yading@10 400 if (mm_flags & AV_CPU_FLAG_MMXEXT)
yading@10 401 hpeldsp_init_mmxext(c, flags, mm_flags);
yading@10 402
yading@10 403 if (mm_flags & AV_CPU_FLAG_3DNOW)
yading@10 404 hpeldsp_init_3dnow(c, flags, mm_flags);
yading@10 405
yading@10 406 if (mm_flags & AV_CPU_FLAG_SSE2)
yading@10 407 hpeldsp_init_sse2(c, flags, mm_flags);
yading@10 408 }