61 int dstStride,
int src1Stride,
int h);
64 int src1Stride,
int h);
66 int dstStride,
int src1Stride,
int h);
68 int dstStride,
int src1Stride,
int h);
70 int dstStride,
int src1Stride,
int h);
72 int dstStride,
int src1Stride,
int h);
74 ptrdiff_t line_size,
int h);
76 static void ff_put_pixels16_mmxext(
uint8_t *block,
const uint8_t *pixels,
77 ptrdiff_t line_size,
int h)
84 int dstStride,
int srcStride,
int h);
86 int dstStride,
int srcStride,
int h);
88 int dstStride,
int srcStride,
91 int dstStride,
int srcStride,
int h);
93 int dstStride,
int srcStride,
int h);
95 int dstStride,
int srcStride,
98 int dstStride,
int srcStride);
100 int dstStride,
int srcStride);
102 int dstStride,
int srcStride);
104 int dstStride,
int srcStride);
106 int dstStride,
int srcStride);
108 int dstStride,
int srcStride);
109 #define ff_put_no_rnd_pixels16_mmxext ff_put_pixels16_mmxext 110 #define ff_put_no_rnd_pixels8_mmxext ff_put_pixels8_mmxext 116 #define JUMPALIGN() __asm__ volatile (".p2align 3"::) 117 #define MOVQ_ZERO(regd) __asm__ volatile ("pxor %%"#regd", %%"#regd ::) 119 #define MOVQ_BFE(regd) \ 121 "pcmpeqd %%"#regd", %%"#regd" \n\t" \ 122 "paddb %%"#regd", %%"#regd" \n\t" ::) 125 #define MOVQ_BONE(regd) __asm__ volatile ("movq %0, %%"#regd" \n\t" :: "m"(ff_bone)) 126 #define MOVQ_WTWO(regd) __asm__ volatile ("movq %0, %%"#regd" \n\t" :: "m"(ff_wtwo)) 130 #define MOVQ_BONE(regd) \ 132 "pcmpeqd %%"#regd", %%"#regd" \n\t" \ 133 "psrlw $15, %%"#regd" \n\t" \ 134 "packuswb %%"#regd", %%"#regd" \n\t" ::) 136 #define MOVQ_WTWO(regd) \ 138 "pcmpeqd %%"#regd", %%"#regd" \n\t" \ 139 "psrlw $15, %%"#regd" \n\t" \ 140 "psllw $1, %%"#regd" \n\t"::) 147 #define PAVGB_MMX(rega, regb, regr, regfe) \ 148 "movq "#rega", "#regr" \n\t" \ 149 "por "#regb", "#regr" \n\t" \ 150 "pxor "#rega", "#regb" \n\t" \ 151 "pand "#regfe", "#regb" \n\t" \ 152 "psrlq $1, "#regb" \n\t" \ 153 "psubb "#regb", "#regr" \n\t" 156 #define PAVGBP_MMX(rega, regb, regr, regc, regd, regp) \ 157 "movq "#rega", "#regr" \n\t" \ 158 "movq "#regc", "#regp" \n\t" \ 159 "por "#regb", "#regr" \n\t" \ 160 "por "#regd", "#regp" \n\t" \ 161 "pxor "#rega", "#regb" \n\t" \ 162 "pxor "#regc", "#regd" \n\t" \ 163 "pand %%mm6, "#regb" \n\t" \ 164 "pand %%mm6, "#regd" \n\t" \ 165 "psrlq $1, "#regd" \n\t" \ 166 "psrlq $1, "#regb" \n\t" \ 167 "psubb "#regb", "#regr" \n\t" \ 168 "psubb "#regd", "#regp" \n\t" 173 #define DEF(x, y) x ## _ ## y ## _mmx 174 #define SET_RND MOVQ_WTWO 175 #define PAVGBP(a, b, c, d, e, f) PAVGBP_MMX(a, b, c, d, e, f) 176 #define PAVGB(a, b, c, e) PAVGB_MMX(a, b, c, e) 177 #define OP_AVG(a, b, c, e) PAVGB_MMX(a, b, c, e) 196 static void ff_avg_pixels16_mmxext(
uint8_t *block,
const uint8_t *pixels,
197 int line_size,
int h)
221 "movq (%3), %%mm0 \n\t" 222 "movq 8(%3), %%mm1 \n\t" 223 "movq 16(%3), %%mm2 \n\t" 224 "movq 24(%3), %%mm3 \n\t" 225 "movq 32(%3), %%mm4 \n\t" 226 "movq 40(%3), %%mm5 \n\t" 227 "movq 48(%3), %%mm6 \n\t" 228 "movq 56(%3), %%mm7 \n\t" 229 "packuswb %%mm1, %%mm0 \n\t" 230 "packuswb %%mm3, %%mm2 \n\t" 231 "packuswb %%mm5, %%mm4 \n\t" 232 "packuswb %%mm7, %%mm6 \n\t" 233 "movq %%mm0, (%0) \n\t" 234 "movq %%mm2, (%0, %1) \n\t" 235 "movq %%mm4, (%0, %1, 2) \n\t" 236 "movq %%mm6, (%0, %2) \n\t" 237 ::
"r"(pix),
"r"((
x86_reg)line_size),
"r"((
x86_reg)line_size * 3),
240 pix += line_size * 4;
247 "movq (%3), %%mm0 \n\t" 248 "movq 8(%3), %%mm1 \n\t" 249 "movq 16(%3), %%mm2 \n\t" 250 "movq 24(%3), %%mm3 \n\t" 251 "movq 32(%3), %%mm4 \n\t" 252 "movq 40(%3), %%mm5 \n\t" 253 "movq 48(%3), %%mm6 \n\t" 254 "movq 56(%3), %%mm7 \n\t" 255 "packuswb %%mm1, %%mm0 \n\t" 256 "packuswb %%mm3, %%mm2 \n\t" 257 "packuswb %%mm5, %%mm4 \n\t" 258 "packuswb %%mm7, %%mm6 \n\t" 259 "movq %%mm0, (%0) \n\t" 260 "movq %%mm2, (%0, %1) \n\t" 261 "movq %%mm4, (%0, %1, 2) \n\t" 262 "movq %%mm6, (%0, %2) \n\t" 263 ::
"r"(pix),
"r"((
x86_reg)line_size),
"r"((
x86_reg)line_size * 3),
"r"(p)
267 #define put_signed_pixels_clamped_mmx_half(off) \ 268 "movq "#off"(%2), %%mm1 \n\t" \ 269 "movq 16 + "#off"(%2), %%mm2 \n\t" \ 270 "movq 32 + "#off"(%2), %%mm3 \n\t" \ 271 "movq 48 + "#off"(%2), %%mm4 \n\t" \ 272 "packsswb 8 + "#off"(%2), %%mm1 \n\t" \ 273 "packsswb 24 + "#off"(%2), %%mm2 \n\t" \ 274 "packsswb 40 + "#off"(%2), %%mm3 \n\t" \ 275 "packsswb 56 + "#off"(%2), %%mm4 \n\t" \ 276 "paddb %%mm0, %%mm1 \n\t" \ 277 "paddb %%mm0, %%mm2 \n\t" \ 278 "paddb %%mm0, %%mm3 \n\t" \ 279 "paddb %%mm0, %%mm4 \n\t" \ 280 "movq %%mm1, (%0) \n\t" \ 281 "movq %%mm2, (%0, %3) \n\t" \ 282 "movq %%mm3, (%0, %3, 2) \n\t" \ 283 "movq %%mm4, (%0, %1) \n\t" 292 "movq "MANGLE(ff_pb_80)
", %%mm0 \n\t" 293 "lea (%3, %3, 2), %1 \n\t" 294 put_signed_pixels_clamped_mmx_half(0)
295 "lea (%0, %3, 4), %0 \n\t" 296 put_signed_pixels_clamped_mmx_half(64)
297 :
"+&r"(pixels),
"=&r"(line_skip3)
298 :
"r"(block),
"r"(line_skip)
316 "movq (%2), %%mm0 \n\t" 317 "movq 8(%2), %%mm1 \n\t" 318 "movq 16(%2), %%mm2 \n\t" 319 "movq 24(%2), %%mm3 \n\t" 320 "movq %0, %%mm4 \n\t" 321 "movq %1, %%mm6 \n\t" 322 "movq %%mm4, %%mm5 \n\t" 323 "punpcklbw %%mm7, %%mm4 \n\t" 324 "punpckhbw %%mm7, %%mm5 \n\t" 325 "paddsw %%mm4, %%mm0 \n\t" 326 "paddsw %%mm5, %%mm1 \n\t" 327 "movq %%mm6, %%mm5 \n\t" 328 "punpcklbw %%mm7, %%mm6 \n\t" 329 "punpckhbw %%mm7, %%mm5 \n\t" 330 "paddsw %%mm6, %%mm2 \n\t" 331 "paddsw %%mm5, %%mm3 \n\t" 332 "packuswb %%mm1, %%mm0 \n\t" 333 "packuswb %%mm3, %%mm2 \n\t" 334 "movq %%mm0, %0 \n\t" 335 "movq %%mm2, %1 \n\t" 336 :
"+m"(*pix),
"+m"(*(pix + line_size))
339 pix += line_size * 2;
344 static void put_pixels8_mmx(
uint8_t *block,
const uint8_t *pixels,
345 ptrdiff_t line_size,
int h)
348 "lea (%3, %3), %%"REG_a
" \n\t" 351 "movq (%1 ), %%mm0 \n\t" 352 "movq (%1, %3), %%mm1 \n\t" 353 "movq %%mm0, (%2) \n\t" 354 "movq %%mm1, (%2, %3) \n\t" 355 "add %%"REG_a
", %1 \n\t" 356 "add %%"REG_a
", %2 \n\t" 357 "movq (%1 ), %%mm0 \n\t" 358 "movq (%1, %3), %%mm1 \n\t" 359 "movq %%mm0, (%2) \n\t" 360 "movq %%mm1, (%2, %3) \n\t" 361 "add %%"REG_a
", %1 \n\t" 362 "add %%"REG_a
", %2 \n\t" 365 :
"+g"(h),
"+r"(pixels),
"+r"(block)
371 static void put_pixels16_mmx(
uint8_t *block,
const uint8_t *pixels,
372 ptrdiff_t line_size,
int h)
375 "lea (%3, %3), %%"REG_a
" \n\t" 378 "movq (%1 ), %%mm0 \n\t" 379 "movq 8(%1 ), %%mm4 \n\t" 380 "movq (%1, %3), %%mm1 \n\t" 381 "movq 8(%1, %3), %%mm5 \n\t" 382 "movq %%mm0, (%2) \n\t" 383 "movq %%mm4, 8(%2) \n\t" 384 "movq %%mm1, (%2, %3) \n\t" 385 "movq %%mm5, 8(%2, %3) \n\t" 386 "add %%"REG_a
", %1 \n\t" 387 "add %%"REG_a
", %2 \n\t" 388 "movq (%1 ), %%mm0 \n\t" 389 "movq 8(%1 ), %%mm4 \n\t" 390 "movq (%1, %3), %%mm1 \n\t" 391 "movq 8(%1, %3), %%mm5 \n\t" 392 "movq %%mm0, (%2) \n\t" 393 "movq %%mm4, 8(%2) \n\t" 394 "movq %%mm1, (%2, %3) \n\t" 395 "movq %%mm5, 8(%2, %3) \n\t" 396 "add %%"REG_a
", %1 \n\t" 397 "add %%"REG_a
", %2 \n\t" 400 :
"+g"(h),
"+r"(pixels),
"+r"(block)
406 #define CLEAR_BLOCKS(name, n) \ 407 static void name(int16_t *blocks) \ 410 "pxor %%mm7, %%mm7 \n\t" \ 411 "mov %1, %%"REG_a" \n\t" \ 413 "movq %%mm7, (%0, %%"REG_a") \n\t" \ 414 "movq %%mm7, 8(%0, %%"REG_a") \n\t" \ 415 "movq %%mm7, 16(%0, %%"REG_a") \n\t" \ 416 "movq %%mm7, 24(%0, %%"REG_a") \n\t" \ 417 "add $32, %%"REG_a" \n\t" \ 419 :: "r"(((uint8_t *)blocks) + 128 * n), \ 424 CLEAR_BLOCKS(clear_blocks_mmx, 6)
425 CLEAR_BLOCKS(clear_block_mmx, 1)
427 static void clear_block_sse(int16_t *block)
430 "xorps %%xmm0, %%xmm0 \n" 431 "movaps %%xmm0, (%0) \n" 432 "movaps %%xmm0, 16(%0) \n" 433 "movaps %%xmm0, 32(%0) \n" 434 "movaps %%xmm0, 48(%0) \n" 435 "movaps %%xmm0, 64(%0) \n" 436 "movaps %%xmm0, 80(%0) \n" 437 "movaps %%xmm0, 96(%0) \n" 438 "movaps %%xmm0, 112(%0) \n" 444 static void clear_blocks_sse(int16_t *blocks)
447 "xorps %%xmm0, %%xmm0 \n" 448 "mov %1, %%"REG_a
" \n" 450 "movaps %%xmm0, (%0, %%"REG_a
") \n" 451 "movaps %%xmm0, 16(%0, %%"REG_a
") \n" 452 "movaps %%xmm0, 32(%0, %%"REG_a
") \n" 453 "movaps %%xmm0, 48(%0, %%"REG_a
") \n" 454 "movaps %%xmm0, 64(%0, %%"REG_a
") \n" 455 "movaps %%xmm0, 80(%0, %%"REG_a
") \n" 456 "movaps %%xmm0, 96(%0, %%"REG_a
") \n" 457 "movaps %%xmm0, 112(%0, %%"REG_a
") \n" 458 "add $128, %%"REG_a
" \n" 460 ::
"r"(((
uint8_t *)blocks) + 128 * 6),
472 "movq (%1, %0), %%mm0 \n\t" 473 "movq (%2, %0), %%mm1 \n\t" 474 "paddb %%mm0, %%mm1 \n\t" 475 "movq %%mm1, (%2, %0) \n\t" 476 "movq 8(%1, %0), %%mm0 \n\t" 477 "movq 8(%2, %0), %%mm1 \n\t" 478 "paddb %%mm0, %%mm1 \n\t" 479 "movq %%mm1, 8(%2, %0) \n\t" 494 int *left,
int *left_top)
498 int l = *left & 0xff;
499 int tl = *left_top & 0xff;
504 "movzbl (%3, %4), %2 \n" 517 "add (%6, %4), %b0 \n" 518 "mov %b0, (%5, %4) \n" 521 :
"+&q"(l),
"+&q"(tl),
"=&r"(
t),
"=&q"(x),
"+&r"(w2)
532 int w,
int h,
int sides)
543 "movd (%0), %%mm0 \n\t" 544 "punpcklbw %%mm0, %%mm0 \n\t" 545 "punpcklwd %%mm0, %%mm0 \n\t" 546 "punpckldq %%mm0, %%mm0 \n\t" 547 "movq %%mm0, -8(%0) \n\t" 548 "movq -8(%0, %2), %%mm1 \n\t" 549 "punpckhbw %%mm1, %%mm1 \n\t" 550 "punpckhwd %%mm1, %%mm1 \n\t" 551 "punpckhdq %%mm1, %%mm1 \n\t" 552 "movq %%mm1, (%0, %2) \n\t" 562 "movd (%0), %%mm0 \n\t" 563 "punpcklbw %%mm0, %%mm0 \n\t" 564 "punpcklwd %%mm0, %%mm0 \n\t" 565 "punpckldq %%mm0, %%mm0 \n\t" 566 "movq %%mm0, -8(%0) \n\t" 567 "movq %%mm0, -16(%0) \n\t" 568 "movq -8(%0, %2), %%mm1 \n\t" 569 "punpckhbw %%mm1, %%mm1 \n\t" 570 "punpckhwd %%mm1, %%mm1 \n\t" 571 "punpckhdq %%mm1, %%mm1 \n\t" 572 "movq %%mm1, (%0, %2) \n\t" 573 "movq %%mm1, 8(%0, %2) \n\t" 584 "movd (%0), %%mm0 \n\t" 585 "punpcklbw %%mm0, %%mm0 \n\t" 586 "punpcklwd %%mm0, %%mm0 \n\t" 587 "movd %%mm0, -4(%0) \n\t" 588 "movd -4(%0, %2), %%mm1 \n\t" 589 "punpcklbw %%mm1, %%mm1 \n\t" 590 "punpckhwd %%mm1, %%mm1 \n\t" 591 "punpckhdq %%mm1, %%mm1 \n\t" 592 "movd %%mm1, (%0, %2) \n\t" 603 for (i = 0; i < h; i += 4) {
607 "movq (%1, %0), %%mm0 \n\t" 608 "movq %%mm0, (%0) \n\t" 609 "movq %%mm0, (%0, %2) \n\t" 610 "movq %%mm0, (%0, %2, 2) \n\t" 611 "movq %%mm0, (%0, %3) \n\t" 623 for (i = 0; i < h; i += 4) {
624 ptr = last_line + (i + 1) *
wrap -
w;
627 "movq (%1, %0), %%mm0 \n\t" 628 "movq %%mm0, (%0) \n\t" 629 "movq %%mm0, (%0, %2) \n\t" 630 "movq %%mm0, (%0, %2, 2) \n\t" 631 "movq %%mm0, (%0, %3) \n\t" 647 #define QPEL_OP(OPNAME, ROUNDER, RND, MMX) \ 648 static void OPNAME ## qpel8_mc00_ ## MMX (uint8_t *dst, uint8_t *src, \ 651 ff_ ## OPNAME ## pixels8_ ## MMX(dst, src, stride, 8); \ 654 static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst, uint8_t *src, \ 658 uint8_t * const half = (uint8_t*)temp; \ 659 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, \ 661 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src, half, \ 662 stride, stride, 8); \ 665 static void OPNAME ## qpel8_mc20_ ## MMX(uint8_t *dst, uint8_t *src, \ 668 ff_ ## OPNAME ## mpeg4_qpel8_h_lowpass_ ## MMX(dst, src, stride, \ 672 static void OPNAME ## qpel8_mc30_ ## MMX(uint8_t *dst, uint8_t *src, \ 676 uint8_t * const half = (uint8_t*)temp; \ 677 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, \ 679 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src + 1, half, stride, \ 683 static void OPNAME ## qpel8_mc01_ ## MMX(uint8_t *dst, uint8_t *src, \ 687 uint8_t * const half = (uint8_t*)temp; \ 688 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, \ 690 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src, half, \ 691 stride, stride, 8); \ 694 static void OPNAME ## qpel8_mc02_ ## MMX(uint8_t *dst, uint8_t *src, \ 697 ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, src, \ 701 static void OPNAME ## qpel8_mc03_ ## MMX(uint8_t *dst, uint8_t *src, \ 705 uint8_t * const half = (uint8_t*)temp; \ 706 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, \ 708 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src + stride, half, stride,\ 712 static void OPNAME ## qpel8_mc11_ ## MMX(uint8_t *dst, uint8_t *src, \ 715 uint64_t half[8 + 9]; \ 716 uint8_t * const halfH = ((uint8_t*)half) + 64; \ 717 uint8_t * const halfHV = ((uint8_t*)half); \ 718 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ 720 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, \ 722 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ 723 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \ 727 static void OPNAME ## qpel8_mc31_ ## MMX(uint8_t *dst, uint8_t *src, \ 730 uint64_t half[8 + 9]; \ 731 uint8_t * const halfH = ((uint8_t*)half) + 64; \ 732 uint8_t * const halfHV = ((uint8_t*)half); \ 733 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ 735 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \ 737 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ 738 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \ 742 static void OPNAME ## qpel8_mc13_ ## MMX(uint8_t *dst, uint8_t *src, \ 745 uint64_t half[8 + 9]; \ 746 uint8_t * const halfH = ((uint8_t*)half) + 64; \ 747 uint8_t * const halfHV = ((uint8_t*)half); \ 748 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ 750 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, \ 752 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ 753 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \ 757 static void OPNAME ## qpel8_mc33_ ## MMX(uint8_t *dst, uint8_t *src, \ 760 uint64_t half[8 + 9]; \ 761 uint8_t * const halfH = ((uint8_t*)half) + 64; \ 762 uint8_t * const halfHV = ((uint8_t*)half); \ 763 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ 765 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \ 767 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ 768 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \ 772 static void OPNAME ## qpel8_mc21_ ## MMX(uint8_t *dst, uint8_t *src, \ 775 uint64_t half[8 + 9]; \ 776 uint8_t * const halfH = ((uint8_t*)half) + 64; \ 777 uint8_t * const halfHV = ((uint8_t*)half); \ 778 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ 780 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ 781 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \ 785 static void OPNAME ## qpel8_mc23_ ## MMX(uint8_t *dst, uint8_t *src, \ 788 uint64_t half[8 + 9]; \ 789 uint8_t * const halfH = ((uint8_t*)half) + 64; \ 790 uint8_t * const halfHV = ((uint8_t*)half); \ 791 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ 793 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ 794 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \ 798 static void OPNAME ## qpel8_mc12_ ## MMX(uint8_t *dst, uint8_t *src, \ 801 uint64_t half[8 + 9]; \ 802 uint8_t * const halfH = ((uint8_t*)half); \ 803 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ 805 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, \ 807 ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \ 811 static void OPNAME ## qpel8_mc32_ ## MMX(uint8_t *dst, uint8_t *src, \ 814 uint64_t half[8 + 9]; \ 815 uint8_t * const halfH = ((uint8_t*)half); \ 816 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ 818 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \ 820 ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \ 824 static void OPNAME ## qpel8_mc22_ ## MMX(uint8_t *dst, uint8_t *src, \ 828 uint8_t * const halfH = ((uint8_t*)half); \ 829 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ 831 ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \ 835 static void OPNAME ## qpel16_mc00_ ## MMX (uint8_t *dst, uint8_t *src, \ 838 ff_ ## OPNAME ## pixels16_ ## MMX(dst, src, stride, 16); \ 841 static void OPNAME ## qpel16_mc10_ ## MMX(uint8_t *dst, uint8_t *src, \ 845 uint8_t * const half = (uint8_t*)temp; \ 846 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, \ 848 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, \ 852 static void OPNAME ## qpel16_mc20_ ## MMX(uint8_t *dst, uint8_t *src, \ 855 ff_ ## OPNAME ## mpeg4_qpel16_h_lowpass_ ## MMX(dst, src, \ 856 stride, stride, 16);\ 859 static void OPNAME ## qpel16_mc30_ ## MMX(uint8_t *dst, uint8_t *src, \ 863 uint8_t * const half = (uint8_t*)temp; \ 864 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, \ 866 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src + 1, half, \ 867 stride, stride, 16); \ 870 static void OPNAME ## qpel16_mc01_ ## MMX(uint8_t *dst, uint8_t *src, \ 874 uint8_t * const half = (uint8_t*)temp; \ 875 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, \ 877 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, \ 881 static void OPNAME ## qpel16_mc02_ ## MMX(uint8_t *dst, uint8_t *src, \ 884 ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, src, \ 888 static void OPNAME ## qpel16_mc03_ ## MMX(uint8_t *dst, uint8_t *src, \ 892 uint8_t * const half = (uint8_t*)temp; \ 893 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, \ 895 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src+stride, half, \ 896 stride, stride, 16); \ 899 static void OPNAME ## qpel16_mc11_ ## MMX(uint8_t *dst, uint8_t *src, \ 902 uint64_t half[16 * 2 + 17 * 2]; \ 903 uint8_t * const halfH = ((uint8_t*)half) + 256; \ 904 uint8_t * const halfHV = ((uint8_t*)half); \ 905 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \ 907 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \ 909 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \ 911 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \ 915 static void OPNAME ## qpel16_mc31_ ## MMX(uint8_t *dst, uint8_t *src, \ 918 uint64_t half[16 * 2 + 17 * 2]; \ 919 uint8_t * const halfH = ((uint8_t*)half) + 256; \ 920 uint8_t * const halfHV = ((uint8_t*)half); \ 921 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \ 923 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \ 925 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \ 927 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \ 931 static void OPNAME ## qpel16_mc13_ ## MMX(uint8_t *dst, uint8_t *src, \ 934 uint64_t half[16 * 2 + 17 * 2]; \ 935 uint8_t * const halfH = ((uint8_t*)half) + 256; \ 936 uint8_t * const halfHV = ((uint8_t*)half); \ 937 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \ 939 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \ 941 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \ 943 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \ 947 static void OPNAME ## qpel16_mc33_ ## MMX(uint8_t *dst, uint8_t *src, \ 950 uint64_t half[16 * 2 + 17 * 2]; \ 951 uint8_t * const halfH = ((uint8_t*)half) + 256; \ 952 uint8_t * const halfHV = ((uint8_t*)half); \ 953 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \ 955 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \ 957 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \ 959 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \ 963 static void OPNAME ## qpel16_mc21_ ## MMX(uint8_t *dst, uint8_t *src, \ 966 uint64_t half[16 * 2 + 17 * 2]; \ 967 uint8_t * const halfH = ((uint8_t*)half) + 256; \ 968 uint8_t * const halfHV = ((uint8_t*)half); \ 969 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \ 971 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \ 973 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \ 977 static void OPNAME ## qpel16_mc23_ ## MMX(uint8_t *dst, uint8_t *src, \ 980 uint64_t half[16 * 2 + 17 * 2]; \ 981 uint8_t * const halfH = ((uint8_t*)half) + 256; \ 982 uint8_t * const halfHV = ((uint8_t*)half); \ 983 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \ 985 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \ 987 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \ 991 static void OPNAME ## qpel16_mc12_ ## MMX(uint8_t *dst, uint8_t *src, \ 994 uint64_t half[17 * 2]; \ 995 uint8_t * const halfH = ((uint8_t*)half); \ 996 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \ 998 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \ 1000 ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \ 1004 static void OPNAME ## qpel16_mc32_ ## MMX(uint8_t *dst, uint8_t *src, \ 1007 uint64_t half[17 * 2]; \ 1008 uint8_t * const halfH = ((uint8_t*)half); \ 1009 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \ 1011 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \ 1013 ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \ 1017 static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, uint8_t *src, \ 1020 uint64_t half[17 * 2]; \ 1021 uint8_t * const halfH = ((uint8_t*)half); \ 1022 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \ 1024 ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \ 1030 QPEL_OP(put_no_rnd_,
ff_pw_15, _no_rnd_, mmxext)
1037 put_pixels8_xy2_mmx(
dst,
src, stride, 8);
1041 put_pixels16_xy2_mmx(
dst,
src, stride, 16);
1045 avg_pixels8_xy2_mmx(
dst,
src, stride, 8);
1049 avg_pixels16_xy2_mmx(
dst,
src, stride, 16);
1053 ptrdiff_t linesize,
int block_w,
int block_h,
1054 int src_x,
int src_y,
int w,
int h);
1057 int stride,
int h,
int ox,
int oy,
1058 int dxx,
int dxy,
int dyx,
int dyy,
1060 emulated_edge_mc_func *emu_edge_fn)
1063 const int ix = ox >> (16 +
shift);
1064 const int iy = oy >> (16 +
shift);
1065 const int oxs = ox >> 4;
1066 const int oys = oy >> 4;
1067 const int dxxs = dxx >> 4;
1068 const int dxys = dxy >> 4;
1069 const int dyxs = dyx >> 4;
1070 const int dyys = dyy >> 4;
1071 const uint16_t r4[4] = {
r,
r,
r, r };
1072 const uint16_t dxy4[4] = { dxys, dxys, dxys, dxys };
1073 const uint16_t dyy4[4] = { dyys, dyys, dyys, dyys };
1075 #define MAX_STRIDE 4096U 1077 uint8_t edge_buf[(MAX_H + 1) * MAX_STRIDE];
1080 const int dxw = (dxx - (1 << (16 +
shift))) * (w - 1);
1081 const int dyh = (dyy - (1 << (16 +
shift))) * (h - 1);
1082 const int dxh = dxy * (h - 1);
1083 const int dyw = dyx * (w - 1);
1084 int need_emu = (unsigned)ix >=
width - w ||
1085 (
unsigned)iy >=
height - h;
1088 ((ox ^ (ox + dxw)) | (ox ^ (ox + dxh)) | (ox ^ (ox + dxw + dxh)) |
1089 (oy ^ (oy + dyw)) | (oy ^ (oy + dyh)) | (oy ^ (oy + dyw + dyh))) >> (16 +
shift)
1091 || (dxx | dxy | dyx | dyy) & 15
1092 || (need_emu && (h > MAX_H ||
stride > MAX_STRIDE))) {
1094 ff_gmc_c(
dst,
src,
stride, h, ox, oy, dxx, dxy, dyx, dyy,
1101 emu_edge_fn(edge_buf,
src, stride, w + 1, h + 1, ix, iy,
width,
height);
1106 "movd %0, %%mm6 \n\t" 1107 "pxor %%mm7, %%mm7 \n\t" 1108 "punpcklwd %%mm6, %%mm6 \n\t" 1109 "punpcklwd %%mm6, %%mm6 \n\t" 1113 for (x = 0; x <
w; x += 4) {
1114 uint16_t dx4[4] = { oxs - dxys + dxxs * (x + 0),
1115 oxs - dxys + dxxs * (x + 1),
1116 oxs - dxys + dxxs * (x + 2),
1117 oxs - dxys + dxxs * (x + 3) };
1118 uint16_t dy4[4] = { oys - dyys + dyxs * (x + 0),
1119 oys - dyys + dyxs * (x + 1),
1120 oys - dyys + dyxs * (x + 2),
1121 oys - dyys + dyxs * (x + 3) };
1123 for (y = 0; y < h; y++) {
1125 "movq %0, %%mm4 \n\t" 1126 "movq %1, %%mm5 \n\t" 1127 "paddw %2, %%mm4 \n\t" 1128 "paddw %3, %%mm5 \n\t" 1129 "movq %%mm4, %0 \n\t" 1130 "movq %%mm5, %1 \n\t" 1131 "psrlw $12, %%mm4 \n\t" 1132 "psrlw $12, %%mm5 \n\t" 1133 :
"+m"(*dx4),
"+m"(*dy4)
1134 :
"m"(*dxy4),
"m"(*dyy4)
1138 "movq %%mm6, %%mm2 \n\t" 1139 "movq %%mm6, %%mm1 \n\t" 1140 "psubw %%mm4, %%mm2 \n\t" 1141 "psubw %%mm5, %%mm1 \n\t" 1142 "movq %%mm2, %%mm0 \n\t" 1143 "movq %%mm4, %%mm3 \n\t" 1144 "pmullw %%mm1, %%mm0 \n\t" 1145 "pmullw %%mm5, %%mm3 \n\t" 1146 "pmullw %%mm5, %%mm2 \n\t" 1147 "pmullw %%mm4, %%mm1 \n\t" 1149 "movd %4, %%mm5 \n\t" 1150 "movd %3, %%mm4 \n\t" 1151 "punpcklbw %%mm7, %%mm5 \n\t" 1152 "punpcklbw %%mm7, %%mm4 \n\t" 1153 "pmullw %%mm5, %%mm3 \n\t" 1154 "pmullw %%mm4, %%mm2 \n\t" 1156 "movd %2, %%mm5 \n\t" 1157 "movd %1, %%mm4 \n\t" 1158 "punpcklbw %%mm7, %%mm5 \n\t" 1159 "punpcklbw %%mm7, %%mm4 \n\t" 1160 "pmullw %%mm5, %%mm1 \n\t" 1161 "pmullw %%mm4, %%mm0 \n\t" 1162 "paddw %5, %%mm1 \n\t" 1163 "paddw %%mm3, %%mm2 \n\t" 1164 "paddw %%mm1, %%mm0 \n\t" 1165 "paddw %%mm2, %%mm0 \n\t" 1167 "psrlw %6, %%mm0 \n\t" 1168 "packuswb %%mm0, %%mm0 \n\t" 1169 "movd %%mm0, %0 \n\t" 1172 :
"m"(
src[0]),
"m"(
src[1]),
1173 "m"(
src[stride]),
"m"(
src[stride + 1]),
1187 int stride,
int h,
int ox,
int oy,
1188 int dxx,
int dxy,
int dyx,
int dyy,
1191 gmc(
dst,
src,
stride, h, ox, oy, dxx, dxy, dyx, dyy,
shift,
r,
1192 width, height, &ff_emulated_edge_mc_8);
1196 int stride,
int h,
int ox,
int oy,
1197 int dxx,
int dxy,
int dyx,
int dyy,
1200 gmc(
dst,
src,
stride, h, ox, oy, dxx, dxy, dyx, dyy,
shift,
r,
1201 width, height, &ff_emulated_edge_mc_8);
1205 int stride,
int h,
int ox,
int oy,
1206 int dxx,
int dxy,
int dyx,
int dyy,
1209 gmc(
dst,
src,
stride, h, ox, oy, dxx, dxy, dyx, dyy,
shift,
r,
1210 width, height, &ff_emulated_edge_mc_8);
1218 put_pixels8_mmx(
dst,
src, stride, 8);
1223 avg_pixels8_mmx(
dst,
src, stride, 8);
1228 put_pixels16_mmx(
dst,
src, stride, 16);
1233 avg_pixels16_mmx(
dst,
src, stride, 16);
1238 ptrdiff_t
stride,
int rnd)
1243 #if CONFIG_DIRAC_DECODER 1244 #define DIRAC_PIXOP(OPNAME2, OPNAME, EXT)\ 1245 void ff_ ## OPNAME2 ## _dirac_pixels8_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h)\ 1248 ff_ ## OPNAME2 ## _dirac_pixels8_c(dst, src, stride, h);\ 1250 OPNAME ## _pixels8_ ## EXT(dst, src[0], stride, h);\ 1252 void ff_ ## OPNAME2 ## _dirac_pixels16_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h)\ 1255 ff_ ## OPNAME2 ## _dirac_pixels16_c(dst, src, stride, h);\ 1257 OPNAME ## _pixels16_ ## EXT(dst, src[0], stride, h);\ 1259 void ff_ ## OPNAME2 ## _dirac_pixels32_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h)\ 1262 ff_ ## OPNAME2 ## _dirac_pixels32_c(dst, src, stride, h);\ 1264 OPNAME ## _pixels16_ ## EXT(dst , src[0] , stride, h);\ 1265 OPNAME ## _pixels16_ ## EXT(dst+16, src[0]+16, stride, h);\ 1270 DIRAC_PIXOP(
put,
put, mmx)
1271 DIRAC_PIXOP(
avg,
avg, mmx)
1275 DIRAC_PIXOP(
avg, ff_avg, mmxext)
1312 static void vector_clipf_sse(
float *
dst,
const float *
src,
1317 "movss %3, %%xmm4 \n\t" 1318 "movss %4, %%xmm5 \n\t" 1319 "shufps $0, %%xmm4, %%xmm4 \n\t" 1320 "shufps $0, %%xmm5, %%xmm5 \n\t" 1322 "movaps (%2, %0), %%xmm0 \n\t" 1323 "movaps 16(%2, %0), %%xmm1 \n\t" 1324 "movaps 32(%2, %0), %%xmm2 \n\t" 1325 "movaps 48(%2, %0), %%xmm3 \n\t" 1326 "maxps %%xmm4, %%xmm0 \n\t" 1327 "maxps %%xmm4, %%xmm1 \n\t" 1328 "maxps %%xmm4, %%xmm2 \n\t" 1329 "maxps %%xmm4, %%xmm3 \n\t" 1330 "minps %%xmm5, %%xmm0 \n\t" 1331 "minps %%xmm5, %%xmm1 \n\t" 1332 "minps %%xmm5, %%xmm2 \n\t" 1333 "minps %%xmm5, %%xmm3 \n\t" 1334 "movaps %%xmm0, (%1, %0) \n\t" 1335 "movaps %%xmm1, 16(%1, %0) \n\t" 1336 "movaps %%xmm2, 32(%1, %0) \n\t" 1337 "movaps %%xmm3, 48(%1, %0) \n\t" 1357 int order,
int mul);
1360 int order,
int mul);
1363 int order,
int mul);
1366 const int16_t *
window,
unsigned int len);
1368 const int16_t *
window,
unsigned int len);
1370 const int16_t *
window,
unsigned int len);
1372 const int16_t *
window,
unsigned int len);
1374 const int16_t *
window,
unsigned int len);
1376 const int16_t *
window,
unsigned int len);
1383 int *left,
int *left_top);
1398 #define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX) \ 1400 c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \ 1401 c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \ 1402 c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \ 1403 c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \ 1404 c->PFX ## _pixels_tab[IDX][ 4] = PREFIX ## PFX ## SIZE ## _mc01_ ## CPU; \ 1405 c->PFX ## _pixels_tab[IDX][ 5] = PREFIX ## PFX ## SIZE ## _mc11_ ## CPU; \ 1406 c->PFX ## _pixels_tab[IDX][ 6] = PREFIX ## PFX ## SIZE ## _mc21_ ## CPU; \ 1407 c->PFX ## _pixels_tab[IDX][ 7] = PREFIX ## PFX ## SIZE ## _mc31_ ## CPU; \ 1408 c->PFX ## _pixels_tab[IDX][ 8] = PREFIX ## PFX ## SIZE ## _mc02_ ## CPU; \ 1409 c->PFX ## _pixels_tab[IDX][ 9] = PREFIX ## PFX ## SIZE ## _mc12_ ## CPU; \ 1410 c->PFX ## _pixels_tab[IDX][10] = PREFIX ## PFX ## SIZE ## _mc22_ ## CPU; \ 1411 c->PFX ## _pixels_tab[IDX][11] = PREFIX ## PFX ## SIZE ## _mc32_ ## CPU; \ 1412 c->PFX ## _pixels_tab[IDX][12] = PREFIX ## PFX ## SIZE ## _mc03_ ## CPU; \ 1413 c->PFX ## _pixels_tab[IDX][13] = PREFIX ## PFX ## SIZE ## _mc13_ ## CPU; \ 1414 c->PFX ## _pixels_tab[IDX][14] = PREFIX ## PFX ## SIZE ## _mc23_ ## CPU; \ 1415 c->PFX ## _pixels_tab[IDX][15] = PREFIX ## PFX ## SIZE ## _mc33_ ## CPU; \ 1428 if (!high_bit_depth) {
1434 #if CONFIG_VIDEODSP && (ARCH_X86_32 || !HAVE_YASM) 1466 #if HAVE_MMXEXT_EXTERNAL 1488 if (!high_bit_depth) {
1500 #if HAVE_INLINE_ASM && CONFIG_VIDEODSP 1511 #if HAVE_SSE2_INLINE 1520 #if HAVE_SSE2_EXTERNAL 1540 #if HAVE_SSSE3_EXTERNAL 1558 #if HAVE_SSE4_EXTERNAL 1567 #if HAVE_7REGS && HAVE_INLINE_ASM
void ff_put_pixels8_mmxext(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
static av_cold void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx, int mm_flags)
#define CONFIG_MPEG_XVMC_DECODER
void ff_put_rv40_qpel16_mc33_mmx(uint8_t *block, uint8_t *pixels, ptrdiff_t stride)
int32_t ff_scalarproduct_and_madd_int16_mmxext(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul)
static int shift(int a, int b)
void ff_avg_cavs_qpel8_mc00_mmxext(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
void ff_vector_clip_int32_sse2(int32_t *dst, const int32_t *src, int32_t min, int32_t max, unsigned int len)
void ff_apply_window_int16_ssse3(int16_t *output, const int16_t *input, const int16_t *window, unsigned int len)
void ff_vector_clip_int32_int_sse2(int32_t *dst, const int32_t *src, int32_t min, int32_t max, unsigned int len)
if max(w)>1 w=0.9 *w/max(w)
void ff_put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
void ff_idct_xvid_sse2(short *block)
#define AV_CPU_FLAG_SSE
SSE functions.
void ff_avg_rv40_qpel16_mc33_mmx(uint8_t *block, uint8_t *pixels, ptrdiff_t stride)
static av_cold void dsputil_init_sse4(DSPContext *c, AVCodecContext *avctx, int mm_flags)
void ff_bswap32_buf_sse2(uint32_t *dst, const uint32_t *src, int w)
void ff_put_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h)
#define AV_CPU_FLAG_CMOV
supports cmov instruction
void ff_avg_dirac_pixels32_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h)
int bits_per_raw_sample
Bits per sample/pixel of internal libavcodec pixel/sample format.
void(* clear_block)(int16_t *block)
output residual component w
Macro definitions for various function/variable attributes.
static av_cold void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx, int mm_flags)
#define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX)
void ff_vector_clip_int32_mmx(int32_t *dst, const int32_t *src, int32_t min, int32_t max, unsigned int len)
#define AV_CPU_FLAG_MMXEXT
SSE integer functions or AMD MMX ext.
void ff_idct_xvid_sse2_put(uint8_t *dest, int line_size, short *block)
#define AV_CPU_FLAG_ATOM
Atom processor, some SSSE3 instructions are slower.
int32_t ff_scalarproduct_int16_mmxext(const int16_t *v1, const int16_t *v2, int order)
#define CODEC_FLAG_BITEXACT
Use only bitexact stuff (except (I)DCT).
int lowres
low resolution decoding, 1-> 1/2 size, 2->1/4 size
#define AV_CPU_FLAG_SSE2SLOW
SSE2 supported, but usually not faster.
void ff_apply_window_int16_sse2(int16_t *output, const int16_t *input, const int16_t *window, unsigned int len)
void ff_idct_xvid_mmxext_put(uint8_t *dest, int line_size, int16_t *block)
void ff_put_dirac_pixels32_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h)
void ff_put_cavs_qpel8_mc00_mmxext(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
#define AV_CPU_FLAG_SSE42
Nehalem SSE4.2 functions.
void(* vector_clipf)(float *dst, const float *src, float min, float max, int len)
#define FF_SSE2_IDCT_PERM
#define AV_CPU_FLAG_SSSE3
Conroe SSSE3 functions.
void(* add_hfyu_median_prediction)(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int w, int *left, int *left_top)
static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, int mm_flags)
void ff_idct_xvid_sse2_add(uint8_t *dest, int line_size, short *block)
void(* h263_h_loop_filter)(uint8_t *src, int stride, int qscale)
static av_cold void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx, int mm_flags)
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame This method is called when a frame is wanted on an output For an input
void ff_apply_window_int16_ssse3_atom(int16_t *output, const int16_t *input, const int16_t *window, unsigned int len)
void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
void(* clear_blocks)(int16_t *blocks)
int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t *v2, int order)
overlapping window(triangular window to avoid too much overlapping) ovidx
void ff_simple_idct_mmx(int16_t *block)
void(* apply_window_int16)(int16_t *output, const int16_t *input, const int16_t *window, unsigned int len)
Apply symmetric window in 16-bit fixed-point.
void ff_apply_window_int16_mmxext(int16_t *output, const int16_t *input, const int16_t *window, unsigned int len)
void ff_simple_idct_put_mmx(uint8_t *dest, int line_size, int16_t *block)
void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int rnd)
void ff_put_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int line_size)
#define av_assert1(cond)
assert() equivalent, that does not lie in speed critical code.
int32_t(* scalarproduct_and_madd_int16)(int16_t *v1, const int16_t *v2, const int16_t *v3, int len, int mul)
Calculate scalar product of v1 and v2, and v1[i] += v3[i] * mul.
void(* draw_edges)(uint8_t *buf, int wrap, int width, int height, int w, int h, int sides)
int idct_algo
IDCT algorithm, see FF_IDCT_* below.
void(* put_signed_pixels_clamped)(const int16_t *block, uint8_t *pixels, int line_size)
void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx)
void(* add_bytes)(uint8_t *dst, uint8_t *src, int w)
void ff_simple_idct_add_mmx(uint8_t *dest, int line_size, int16_t *block)
void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int line_size)
void(* put_pixels_clamped)(const int16_t *block, uint8_t *pixels, int line_size)
static av_cold void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx, int mm_flags)
void ff_vector_clip_int32_sse4(int32_t *dst, const int32_t *src, int32_t min, int32_t max, unsigned int len)
void ff_h263_h_loop_filter_mmx(uint8_t *src, int stride, int qscale)
#define diff(a, as, b, bs)
int xvmc_acceleration
XVideo Motion Acceleration.
void ff_avg_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h)
#define AV_CPU_FLAG_3DNOW
AMD 3DNOW.
void(* add_pixels_clamped)(const int16_t *block, uint8_t *pixels, int line_size)
void(* vector_clip_int32)(int32_t *dst, const int32_t *src, int32_t min, int32_t max, unsigned int len)
Clip each element in an array of int32_t to a given minimum and maximum value.
int idct_permutation_type
void ff_bswap32_buf_ssse3(uint32_t *dst, const uint32_t *src, int w)
void(* idct_add)(uint8_t *dest, int line_size, int16_t *block)
block -> idct -> add dest -> clip to unsigned 8 bit -> dest.
main external API structure.
#define AV_CPU_FLAG_MMX
standard MMX
#define FF_SIMPLE_IDCT_PERM
void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int line_size)
void ff_add_hfyu_median_prediction_mmxext(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int w, int *left, int *left_top)
int32_t ff_scalarproduct_and_madd_int16_sse2(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul)
#define CONFIG_H263_DECODER
BYTE int const BYTE int int int height
synthesis window for stochastic i
void(* bswap_buf)(uint32_t *dst, const uint32_t *src, int w)
void(* gmc)(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
global motion compensation.
#define AV_CPU_FLAG_SSE4
Penryn SSE4.1 functions.
void ff_idct_xvid_mmx_add(uint8_t *dest, int line_size, int16_t *block)
void ff_put_cavs_qpel16_mc00_mmxext(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
void ff_idct_xvid_mmx_put(uint8_t *dest, int line_size, int16_t *block)
int32_t(* scalarproduct_int16)(const int16_t *v1, const int16_t *v2, int len)
Calculate scalar product of two vectors.
void ff_idct_xvid_mmxext_add(uint8_t *dest, int line_size, int16_t *block)
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
void ff_apply_window_int16_round_sse2(int16_t *output, const int16_t *input, const int16_t *window, unsigned int len)
void(* idct)(int16_t *block)
header for Xvid IDCT functions
av_cold void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx)
void ff_apply_window_int16_round_mmxext(int16_t *output, const int16_t *input, const int16_t *window, unsigned int len)
static const int shift2[6]
these buffered frames must be flushed immediately if a new input produces new output(Example:frame rate-doubling filter:filter_frame must(1) flush the second copy of the previous frame, if it is still there,(2) push the first copy of the incoming frame,(3) keep the second copy for later.) If the input frame is not enough to produce output
#define CONFIG_H263_ENCODER
int(* add_hfyu_left_prediction)(uint8_t *dst, const uint8_t *src, int w, int left)
Core video DSP helper functions.
void ff_put_rv40_qpel8_mc33_mmx(uint8_t *block, uint8_t *pixels, ptrdiff_t stride)
void(* idct_put)(uint8_t *dest, int line_size, int16_t *block)
block -> idct -> clip to unsigned 8 bit -> dest.
void(* h263_v_loop_filter)(uint8_t *src, int stride, int qscale)
int ff_add_hfyu_left_prediction_ssse3(uint8_t *dst, const uint8_t *src, int w, int left)
else dst[i][x+y *dst_stride[i]]
void ff_avg_cavs_qpel16_mc00_mmxext(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
int ff_add_hfyu_left_prediction_sse4(uint8_t *dst, const uint8_t *src, int w, int left)
#define FF_IDCT_SIMPLEMMX
#define AV_CPU_FLAG_SSE2
PIV SSE2 functions.
void ff_idct_xvid_mmx(short *block)
int32_t ff_scalarproduct_and_madd_int16_ssse3(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul)
DECLARE_ALIGNED(8, const uint64_t, ff_pw_15)=0
void ff_idct_xvid_mmxext(short *block)
void ff_avg_pixels8_mmxext(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
void ff_h263_v_loop_filter_mmx(uint8_t *src, int stride, int qscale)
void ff_avg_rv40_qpel8_mc33_mmx(uint8_t *block, uint8_t *pixels, ptrdiff_t stride)