43 static int sse8_mmx(
void *
v,
uint8_t * pix1,
uint8_t * pix2,
int line_size,
int h) {
53 "movq (%0,%3),%%mm3\n" 54 "movq (%1,%3),%%mm4\n" 61 "psubusb %%mm2,%%mm1\n" 62 "psubusb %%mm4,%%mm3\n" 63 "psubusb %%mm5,%%mm2\n" 64 "psubusb %%mm6,%%mm4\n" 73 "punpckhbw %%mm0,%%mm2\n" 74 "punpckhbw %%mm0,%%mm4\n" 75 "punpcklbw %%mm0,%%mm1\n" 76 "punpcklbw %%mm0,%%mm3\n" 78 "pmaddwd %%mm2,%%mm2\n" 79 "pmaddwd %%mm4,%%mm4\n" 80 "pmaddwd %%mm1,%%mm1\n" 81 "pmaddwd %%mm3,%%mm3\n" 98 :
"+r" (pix1),
"+r" (pix2),
"=r"(tmp)
99 :
"r" ((
x86_reg)line_size) ,
"m" (h)
104 static int sse16_mmx(
void *v,
uint8_t * pix1,
uint8_t * pix2,
int line_size,
int h) {
121 "psubusb %%mm2,%%mm1\n" 122 "psubusb %%mm4,%%mm3\n" 123 "psubusb %%mm5,%%mm2\n" 124 "psubusb %%mm6,%%mm4\n" 133 "punpckhbw %%mm0,%%mm2\n" 134 "punpckhbw %%mm0,%%mm4\n" 135 "punpcklbw %%mm0,%%mm1\n" 136 "punpcklbw %%mm0,%%mm3\n" 138 "pmaddwd %%mm2,%%mm2\n" 139 "pmaddwd %%mm4,%%mm4\n" 140 "pmaddwd %%mm1,%%mm1\n" 141 "pmaddwd %%mm3,%%mm3\n" 146 "paddd %%mm2,%%mm1\n" 147 "paddd %%mm4,%%mm3\n" 148 "paddd %%mm1,%%mm7\n" 149 "paddd %%mm3,%%mm7\n" 156 "paddd %%mm7,%%mm1\n" 158 :
"+r" (pix1),
"+r" (pix2),
"=r"(tmp)
159 :
"r" ((
x86_reg)line_size) ,
"m" (h)
164 static int hf_noise8_mmx(
uint8_t * pix1,
int line_size,
int h) {
172 "movq %%mm0, %%mm1\n" 176 "movq %%mm0, %%mm2\n" 177 "movq %%mm1, %%mm3\n" 178 "punpcklbw %%mm7,%%mm0\n" 179 "punpcklbw %%mm7,%%mm1\n" 180 "punpckhbw %%mm7,%%mm2\n" 181 "punpckhbw %%mm7,%%mm3\n" 182 "psubw %%mm1, %%mm0\n" 183 "psubw %%mm3, %%mm2\n" 188 "movq %%mm4, %%mm1\n" 192 "movq %%mm4, %%mm5\n" 193 "movq %%mm1, %%mm3\n" 194 "punpcklbw %%mm7,%%mm4\n" 195 "punpcklbw %%mm7,%%mm1\n" 196 "punpckhbw %%mm7,%%mm5\n" 197 "punpckhbw %%mm7,%%mm3\n" 198 "psubw %%mm1, %%mm4\n" 199 "psubw %%mm3, %%mm5\n" 200 "psubw %%mm4, %%mm0\n" 201 "psubw %%mm5, %%mm2\n" 202 "pxor %%mm3, %%mm3\n" 203 "pxor %%mm1, %%mm1\n" 204 "pcmpgtw %%mm0, %%mm3\n\t" 205 "pcmpgtw %%mm2, %%mm1\n\t" 206 "pxor %%mm3, %%mm0\n" 207 "pxor %%mm1, %%mm2\n" 208 "psubw %%mm3, %%mm0\n" 209 "psubw %%mm1, %%mm2\n" 210 "paddw %%mm0, %%mm2\n" 211 "paddw %%mm2, %%mm6\n" 217 "movq %%mm0, %%mm1\n" 221 "movq %%mm0, %%mm2\n" 222 "movq %%mm1, %%mm3\n" 223 "punpcklbw %%mm7,%%mm0\n" 224 "punpcklbw %%mm7,%%mm1\n" 225 "punpckhbw %%mm7,%%mm2\n" 226 "punpckhbw %%mm7,%%mm3\n" 227 "psubw %%mm1, %%mm0\n" 228 "psubw %%mm3, %%mm2\n" 229 "psubw %%mm0, %%mm4\n" 230 "psubw %%mm2, %%mm5\n" 231 "pxor %%mm3, %%mm3\n" 232 "pxor %%mm1, %%mm1\n" 233 "pcmpgtw %%mm4, %%mm3\n\t" 234 "pcmpgtw %%mm5, %%mm1\n\t" 235 "pxor %%mm3, %%mm4\n" 236 "pxor %%mm1, %%mm5\n" 237 "psubw %%mm3, %%mm4\n" 238 "psubw %%mm1, %%mm5\n" 239 "paddw %%mm4, %%mm5\n" 240 "paddw %%mm5, %%mm6\n" 245 "movq %%mm4, %%mm1\n" 249 "movq %%mm4, %%mm5\n" 250 "movq %%mm1, %%mm3\n" 251 "punpcklbw %%mm7,%%mm4\n" 252 "punpcklbw %%mm7,%%mm1\n" 253 "punpckhbw %%mm7,%%mm5\n" 254 "punpckhbw %%mm7,%%mm3\n" 255 "psubw %%mm1, %%mm4\n" 256 "psubw %%mm3, %%mm5\n" 257 "psubw %%mm4, %%mm0\n" 258 "psubw %%mm5, %%mm2\n" 259 "pxor %%mm3, %%mm3\n" 260 "pxor %%mm1, %%mm1\n" 261 "pcmpgtw %%mm0, %%mm3\n\t" 262 "pcmpgtw %%mm2, %%mm1\n\t" 263 "pxor %%mm3, %%mm0\n" 264 "pxor %%mm1, %%mm2\n" 265 "psubw %%mm3, %%mm0\n" 266 "psubw %%mm1, %%mm2\n" 267 "paddw %%mm0, %%mm2\n" 268 "paddw %%mm2, %%mm6\n" 274 "movq %%mm6, %%mm0\n" 275 "punpcklwd %%mm7,%%mm0\n" 276 "punpckhwd %%mm7,%%mm6\n" 277 "paddd %%mm0, %%mm6\n" 281 "paddd %%mm6,%%mm0\n" 283 :
"+r" (pix1),
"=r"(tmp)
284 :
"r" ((
x86_reg)line_size) ,
"g" (h-2)
289 static int hf_noise16_mmx(
uint8_t * pix1,
int line_size,
int h) {
299 "movq %%mm0, %%mm2\n" 300 "movq %%mm1, %%mm3\n" 301 "punpcklbw %%mm7,%%mm0\n" 302 "punpcklbw %%mm7,%%mm1\n" 303 "punpckhbw %%mm7,%%mm2\n" 304 "punpckhbw %%mm7,%%mm3\n" 305 "psubw %%mm1, %%mm0\n" 306 "psubw %%mm3, %%mm2\n" 312 "movq %%mm4, %%mm5\n" 313 "movq %%mm1, %%mm3\n" 314 "punpcklbw %%mm7,%%mm4\n" 315 "punpcklbw %%mm7,%%mm1\n" 316 "punpckhbw %%mm7,%%mm5\n" 317 "punpckhbw %%mm7,%%mm3\n" 318 "psubw %%mm1, %%mm4\n" 319 "psubw %%mm3, %%mm5\n" 320 "psubw %%mm4, %%mm0\n" 321 "psubw %%mm5, %%mm2\n" 322 "pxor %%mm3, %%mm3\n" 323 "pxor %%mm1, %%mm1\n" 324 "pcmpgtw %%mm0, %%mm3\n\t" 325 "pcmpgtw %%mm2, %%mm1\n\t" 326 "pxor %%mm3, %%mm0\n" 327 "pxor %%mm1, %%mm2\n" 328 "psubw %%mm3, %%mm0\n" 329 "psubw %%mm1, %%mm2\n" 330 "paddw %%mm0, %%mm2\n" 331 "paddw %%mm2, %%mm6\n" 338 "movq %%mm0, %%mm2\n" 339 "movq %%mm1, %%mm3\n" 340 "punpcklbw %%mm7,%%mm0\n" 341 "punpcklbw %%mm7,%%mm1\n" 342 "punpckhbw %%mm7,%%mm2\n" 343 "punpckhbw %%mm7,%%mm3\n" 344 "psubw %%mm1, %%mm0\n" 345 "psubw %%mm3, %%mm2\n" 346 "psubw %%mm0, %%mm4\n" 347 "psubw %%mm2, %%mm5\n" 348 "pxor %%mm3, %%mm3\n" 349 "pxor %%mm1, %%mm1\n" 350 "pcmpgtw %%mm4, %%mm3\n\t" 351 "pcmpgtw %%mm5, %%mm1\n\t" 352 "pxor %%mm3, %%mm4\n" 353 "pxor %%mm1, %%mm5\n" 354 "psubw %%mm3, %%mm4\n" 355 "psubw %%mm1, %%mm5\n" 356 "paddw %%mm4, %%mm5\n" 357 "paddw %%mm5, %%mm6\n" 363 "movq %%mm4, %%mm5\n" 364 "movq %%mm1, %%mm3\n" 365 "punpcklbw %%mm7,%%mm4\n" 366 "punpcklbw %%mm7,%%mm1\n" 367 "punpckhbw %%mm7,%%mm5\n" 368 "punpckhbw %%mm7,%%mm3\n" 369 "psubw %%mm1, %%mm4\n" 370 "psubw %%mm3, %%mm5\n" 371 "psubw %%mm4, %%mm0\n" 372 "psubw %%mm5, %%mm2\n" 373 "pxor %%mm3, %%mm3\n" 374 "pxor %%mm1, %%mm1\n" 375 "pcmpgtw %%mm0, %%mm3\n\t" 376 "pcmpgtw %%mm2, %%mm1\n\t" 377 "pxor %%mm3, %%mm0\n" 378 "pxor %%mm1, %%mm2\n" 379 "psubw %%mm3, %%mm0\n" 380 "psubw %%mm1, %%mm2\n" 381 "paddw %%mm0, %%mm2\n" 382 "paddw %%mm2, %%mm6\n" 388 "movq %%mm6, %%mm0\n" 389 "punpcklwd %%mm7,%%mm0\n" 390 "punpckhwd %%mm7,%%mm6\n" 391 "paddd %%mm0, %%mm6\n" 395 "paddd %%mm6,%%mm0\n" 397 :
"+r" (pix1),
"=r"(tmp)
398 :
"r" ((
x86_reg)line_size) ,
"g" (h-2)
400 return tmp + hf_noise8_mmx(pix+8, line_size, h);
403 static int nsse16_mmx(
void *p,
uint8_t * pix1,
uint8_t * pix2,
int line_size,
int h) {
407 if(c) score1 = c->
dsp.
sse[0](
c, pix1, pix2, line_size, h);
408 else score1 = sse16_mmx(c, pix1, pix2, line_size, h);
409 score2= hf_noise16_mmx(pix1, line_size, h) - hf_noise16_mmx(pix2, line_size, h);
412 else return score1 +
FFABS(score2)*8;
415 static int nsse8_mmx(
void *p,
uint8_t * pix1,
uint8_t * pix2,
int line_size,
int h) {
417 int score1= sse8_mmx(c, pix1, pix2, line_size, h);
418 int score2= hf_noise8_mmx(pix1, line_size, h) - hf_noise8_mmx(pix2, line_size, h);
421 else return score1 +
FFABS(score2)*8;
424 static int vsad_intra16_mmx(
void *v,
uint8_t * pix,
uint8_t *
dummy,
int line_size,
int h) {
430 #define SUM(in0, in1, out0, out1) \ 431 "movq (%0), %%mm2\n"\ 432 "movq 8(%0), %%mm3\n"\ 434 "movq %%mm2, " #out0 "\n"\ 435 "movq %%mm3, " #out1 "\n"\ 436 "psubusb " #in0 ", %%mm2\n"\ 437 "psubusb " #in1 ", %%mm3\n"\ 438 "psubusb " #out0 ", " #in0 "\n"\ 439 "psubusb " #out1 ", " #in1 "\n"\ 440 "por %%mm2, " #in0 "\n"\ 441 "por %%mm3, " #in1 "\n"\ 442 "movq " #in0 ", %%mm2\n"\ 443 "movq " #in1 ", %%mm3\n"\ 444 "punpcklbw %%mm7, " #in0 "\n"\ 445 "punpcklbw %%mm7, " #in1 "\n"\ 446 "punpckhbw %%mm7, %%mm2\n"\ 447 "punpckhbw %%mm7, %%mm3\n"\ 448 "paddw " #in1 ", " #in0 "\n"\ 449 "paddw %%mm3, %%mm2\n"\ 450 "paddw %%mm2, " #in0 "\n"\ 451 "paddw " #in0 ", %%mm6\n" 464 SUM(%%mm4, %%mm5, %%mm0, %%mm1)
466 SUM(%%mm0, %%mm1, %%mm4, %%mm5)
473 "paddw %%mm6,%%mm0\n" 476 "paddw %%mm6,%%mm0\n" 478 :
"+r" (pix),
"=r"(tmp)
479 :
"r" ((
x86_reg)line_size) ,
"m" (h)
485 static int vsad_intra16_mmxext(
void *v,
uint8_t *pix,
uint8_t *dummy,
486 int line_size,
int h)
493 #define SUM(in0, in1, out0, out1) \ 494 "movq (%0), " #out0 "\n"\ 495 "movq 8(%0), " #out1 "\n"\ 497 "psadbw " #out0 ", " #in0 "\n"\ 498 "psadbw " #out1 ", " #in1 "\n"\ 499 "paddw " #in1 ", " #in0 "\n"\ 500 "paddw " #in0 ", %%mm6\n" 512 SUM(%%mm4, %%mm5, %%mm0, %%mm1)
514 SUM(%%mm0, %%mm1, %%mm4, %%mm5)
520 :
"+r" (pix),
"=r"(tmp)
521 :
"r" ((
x86_reg)line_size) ,
"m" (h)
527 static int vsad16_mmx(
void *v,
uint8_t * pix1,
uint8_t * pix2,
int line_size,
int h) {
534 #define SUM(in0, in1, out0, out1) \ 536 "movq (%1)," #out0 "\n"\ 537 "movq 8(%0),%%mm3\n"\ 538 "movq 8(%1)," #out1 "\n"\ 541 "psubb " #out0 ", %%mm2\n"\ 542 "psubb " #out1 ", %%mm3\n"\ 543 "pxor %%mm7, %%mm2\n"\ 544 "pxor %%mm7, %%mm3\n"\ 545 "movq %%mm2, " #out0 "\n"\ 546 "movq %%mm3, " #out1 "\n"\ 547 "psubusb " #in0 ", %%mm2\n"\ 548 "psubusb " #in1 ", %%mm3\n"\ 549 "psubusb " #out0 ", " #in0 "\n"\ 550 "psubusb " #out1 ", " #in1 "\n"\ 551 "por %%mm2, " #in0 "\n"\ 552 "por %%mm3, " #in1 "\n"\ 553 "movq " #in0 ", %%mm2\n"\ 554 "movq " #in1 ", %%mm3\n"\ 555 "punpcklbw %%mm7, " #in0 "\n"\ 556 "punpcklbw %%mm7, " #in1 "\n"\ 557 "punpckhbw %%mm7, %%mm2\n"\ 558 "punpckhbw %%mm7, %%mm3\n"\ 559 "paddw " #in1 ", " #in0 "\n"\ 560 "paddw %%mm3, %%mm2\n"\ 561 "paddw %%mm2, " #in0 "\n"\ 562 "paddw " #in0 ", %%mm6\n" 568 "pcmpeqw %%mm7,%%mm7\n" 570 "packsswb %%mm7, %%mm7\n" 577 "psubb %%mm2, %%mm0\n" 578 "psubb %%mm3, %%mm1\n" 579 "pxor %%mm7, %%mm0\n" 580 "pxor %%mm7, %%mm1\n" 584 SUM(%%mm4, %%mm5, %%mm0, %%mm1)
586 SUM(%%mm0, %%mm1, %%mm4, %%mm5)
593 "paddw %%mm6,%%mm0\n" 596 "paddw %%mm6,%%mm0\n" 598 :
"+r" (pix1),
"+r" (pix2),
"=r"(tmp)
599 :
"r" ((
x86_reg)line_size) ,
"m" (h)
606 int line_size,
int h)
614 #define SUM(in0, in1, out0, out1) \ 615 "movq (%0)," #out0 "\n"\ 617 "movq 8(%0)," #out1 "\n"\ 618 "movq 8(%1),%%mm3\n"\ 621 "psubb %%mm2, " #out0 "\n"\ 622 "psubb %%mm3, " #out1 "\n"\ 623 "pxor %%mm7, " #out0 "\n"\ 624 "pxor %%mm7, " #out1 "\n"\ 625 "psadbw " #out0 ", " #in0 "\n"\ 626 "psadbw " #out1 ", " #in1 "\n"\ 627 "paddw " #in1 ", " #in0 "\n"\ 628 "paddw " #in0 ", %%mm6\n" 633 "pcmpeqw %%mm7,%%mm7\n" 635 "packsswb %%mm7, %%mm7\n" 642 "psubb %%mm2, %%mm0\n" 643 "psubb %%mm3, %%mm1\n" 644 "pxor %%mm7, %%mm0\n" 645 "pxor %%mm7, %%mm1\n" 649 SUM(%%mm4, %%mm5, %%mm0, %%mm1)
651 SUM(%%mm0, %%mm1, %%mm4, %%mm5)
657 :
"+r" (pix1),
"+r" (pix2),
"=r"(tmp)
658 :
"r" ((
x86_reg)line_size) ,
"m" (h)
669 "movq (%2, %0), %%mm0 \n\t" 670 "movq (%1, %0), %%mm1 \n\t" 671 "psubb %%mm0, %%mm1 \n\t" 672 "movq %%mm1, (%3, %0) \n\t" 673 "movq 8(%2, %0), %%mm0 \n\t" 674 "movq 8(%1, %0), %%mm1 \n\t" 675 "psubb %%mm0, %%mm1 \n\t" 676 "movq %%mm1, 8(%3, %0) \n\t" 681 :
"r"(src1),
"r"(src2),
"r"(dst),
"r"((
x86_reg)w-15)
684 dst[i+0] = src1[i+0]-src2[i+0];
687 static void sub_hfyu_median_prediction_mmxext(
uint8_t *dst,
const uint8_t *src1,
689 int *left,
int *left_top)
695 "movq (%1, %0), %%mm0 \n\t" 696 "psllq $8, %%mm0 \n\t" 698 "movq (%1, %0), %%mm1 \n\t" 699 "movq -1(%2, %0), %%mm2 \n\t" 700 "movq (%2, %0), %%mm3 \n\t" 701 "movq %%mm2, %%mm4 \n\t" 702 "psubb %%mm0, %%mm2 \n\t" 703 "paddb %%mm1, %%mm2 \n\t" 704 "movq %%mm4, %%mm5 \n\t" 705 "pmaxub %%mm1, %%mm4 \n\t" 706 "pminub %%mm5, %%mm1 \n\t" 707 "pminub %%mm2, %%mm4 \n\t" 708 "pmaxub %%mm1, %%mm4 \n\t" 709 "psubb %%mm4, %%mm3 \n\t" 710 "movq %%mm3, (%3, %0) \n\t" 712 "movq -1(%1, %0), %%mm0 \n\t" 716 :
"r"(src1),
"r"(src2),
"r"(dst),
"r"((
x86_reg)w)
722 dst[0]= src2[0] -
mid_pred(l, src1[0], (l + src1[0] - lt)&0xFF);
724 *left_top= src1[w-1];
728 #define MMABS_MMX(a,z)\ 729 "pxor " #z ", " #z " \n\t"\ 730 "pcmpgtw " #a ", " #z " \n\t"\ 731 "pxor " #z ", " #a " \n\t"\ 732 "psubw " #z ", " #a " \n\t" 734 #define MMABS_MMXEXT(a, z) \ 735 "pxor " #z ", " #z " \n\t"\ 736 "psubw " #a ", " #z " \n\t"\ 737 "pmaxsw " #z ", " #a " \n\t" 739 #define MMABS_SSSE3(a,z)\ 740 "pabsw " #a ", " #a " \n\t" 742 #define MMABS_SUM(a,z, sum)\ 744 "paddusw " #a ", " #sum " \n\t" 749 #define HSUM_MMX(a, t, dst)\ 750 "movq "#a", "#t" \n\t"\ 751 "psrlq $32, "#a" \n\t"\ 752 "paddusw "#t", "#a" \n\t"\ 753 "movq "#a", "#t" \n\t"\ 754 "psrlq $16, "#a" \n\t"\ 755 "paddusw "#t", "#a" \n\t"\ 756 "movd "#a", "#dst" \n\t"\ 758 #define HSUM_MMXEXT(a, t, dst) \ 759 "pshufw $0x0E, "#a", "#t" \n\t"\ 760 "paddusw "#t", "#a" \n\t"\ 761 "pshufw $0x01, "#a", "#t" \n\t"\ 762 "paddusw "#t", "#a" \n\t"\ 763 "movd "#a", "#dst" \n\t"\ 765 #define HSUM_SSE2(a, t, dst)\ 766 "movhlps "#a", "#t" \n\t"\ 767 "paddusw "#t", "#a" \n\t"\ 768 "pshuflw $0x0E, "#a", "#t" \n\t"\ 769 "paddusw "#t", "#a" \n\t"\ 770 "pshuflw $0x01, "#a", "#t" \n\t"\ 771 "paddusw "#t", "#a" \n\t"\ 772 "movd "#a", "#dst" \n\t"\ 774 #define DCT_SAD4(m,mm,o)\ 775 "mov"#m" "#o"+ 0(%1), "#mm"2 \n\t"\ 776 "mov"#m" "#o"+16(%1), "#mm"3 \n\t"\ 777 "mov"#m" "#o"+32(%1), "#mm"4 \n\t"\ 778 "mov"#m" "#o"+48(%1), "#mm"5 \n\t"\ 779 MMABS_SUM(mm##2, mm##6, mm##0)\ 780 MMABS_SUM(mm##3, mm##7, mm##1)\ 781 MMABS_SUM(mm##4, mm##6, mm##0)\ 782 MMABS_SUM(mm##5, mm##7, mm##1)\ 785 "pxor %%mm0, %%mm0 \n\t"\ 786 "pxor %%mm1, %%mm1 \n\t"\ 787 DCT_SAD4(q, %%mm, 0)\ 788 DCT_SAD4(q, %%mm, 8)\ 789 DCT_SAD4(q, %%mm, 64)\ 790 DCT_SAD4(q, %%mm, 72)\ 791 "paddusw %%mm1, %%mm0 \n\t"\ 792 HSUM(%%mm0, %%mm1, %0) 794 #define DCT_SAD_SSE2\ 795 "pxor %%xmm0, %%xmm0 \n\t"\ 796 "pxor %%xmm1, %%xmm1 \n\t"\ 797 DCT_SAD4(dqa, %%xmm, 0)\ 798 DCT_SAD4(dqa, %%xmm, 64)\ 799 "paddusw %%xmm1, %%xmm0 \n\t"\ 800 HSUM(%%xmm0, %%xmm1, %0) 802 #define DCT_SAD_FUNC(cpu) \ 803 static int sum_abs_dctelem_##cpu(int16_t *block){\ 813 #define DCT_SAD DCT_SAD_MMX 814 #define HSUM(a,t,dst) HSUM_MMX(a,t,dst) 815 #define MMABS(a,z) MMABS_MMX(a,z) 820 #define HSUM(a,t,dst) HSUM_MMXEXT(a,t,dst) 821 #define MMABS(a,z) MMABS_MMXEXT(a,z) 826 #define DCT_SAD DCT_SAD_SSE2 827 #define HSUM(a,t,dst) HSUM_SSE2(a,t,dst) 831 #if HAVE_SSSE3_INLINE 832 #define MMABS(a,z) MMABS_SSSE3(a,z) 839 static int ssd_int8_vs_int16_mmx(
const int8_t *pix1,
const int16_t *pix2,
int size){
843 "pxor %%mm4, %%mm4 \n" 846 "movq (%2,%0), %%mm2 \n" 847 "movq (%3,%0,2), %%mm0 \n" 848 "movq 8(%3,%0,2), %%mm1 \n" 849 "punpckhbw %%mm2, %%mm3 \n" 850 "punpcklbw %%mm2, %%mm2 \n" 853 "psubw %%mm3, %%mm1 \n" 854 "psubw %%mm2, %%mm0 \n" 855 "pmaddwd %%mm1, %%mm1 \n" 856 "pmaddwd %%mm0, %%mm0 \n" 857 "paddd %%mm1, %%mm4 \n" 858 "paddd %%mm0, %%mm4 \n" 860 "movq %%mm4, %%mm3 \n" 861 "psrlq $32, %%mm3 \n" 862 "paddd %%mm3, %%mm4 \n" 865 :
"r"(pix1),
"r"(pix2)
870 #define PHADDD(a, t)\ 871 "movq "#a", "#t" \n\t"\ 872 "psrlq $32, "#a" \n\t"\ 873 "paddd "#t", "#a" \n\t" 879 #define PMULHRW(x, y, s, o)\ 880 "pmulhw " #s ", "#x " \n\t"\ 881 "pmulhw " #s ", "#y " \n\t"\ 882 "paddw " #o ", "#x " \n\t"\ 883 "paddw " #o ", "#y " \n\t"\ 884 "psraw $1, "#x " \n\t"\ 885 "psraw $1, "#y " \n\t" 886 #define DEF(x) x ## _mmx 887 #define SET_RND MOVQ_WONE 888 #define SCALE_OFFSET 1 897 #define DEF(x) x ## _3dnow 899 #define SCALE_OFFSET 0 900 #define PMULHRW(x, y, s, o)\ 901 "pmulhrw " #s ", "#x " \n\t"\ 902 "pmulhrw " #s ", "#y " \n\t" 911 #if HAVE_SSSE3_INLINE 913 #define DEF(x) x ## _ssse3 915 #define SCALE_OFFSET -1 916 #define PHADDD(a, t)\ 917 "pshufw $0x0E, "#a", "#t" \n\t"\ 918 "paddd "#t", "#a" \n\t" 919 #define PMULHRW(x, y, s, o)\ 920 "pmulhrsw " #s ", "#x " \n\t"\ 921 "pmulhrsw " #s ", "#y " \n\t" 936 #define hadamard_func(cpu) \ 937 int ff_hadamard8_diff_##cpu (void *s, uint8_t *src1, uint8_t *src2, \ 938 int stride, int h); \ 939 int ff_hadamard8_diff16_##cpu(void *s, uint8_t *src1, uint8_t *src2, \ 950 int bit_depth = avctx->bits_per_raw_sample;
968 const int dct_algo = avctx->dct_algo;
969 if (avctx->bits_per_raw_sample <= 8 &&
981 c->diff_bytes= diff_bytes_mmx;
982 c->sum_abs_dctelem= sum_abs_dctelem_mmx;
984 c->sse[0] = sse16_mmx;
985 c->sse[1] = sse8_mmx;
986 c->vsad[4]= vsad_intra16_mmx;
988 c->nsse[0] = nsse16_mmx;
989 c->nsse[1] = nsse8_mmx;
991 c->vsad[0] = vsad16_mmx;
995 c->try_8x8basis= try_8x8basis_mmx;
997 c->add_8x8basis= add_8x8basis_mmx;
999 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_mmx;
1002 c->sum_abs_dctelem = sum_abs_dctelem_mmxext;
1003 c->vsad[4] = vsad_intra16_mmxext;
1006 c->vsad[0] = vsad16_mmxext;
1009 c->sub_hfyu_median_prediction = sub_hfyu_median_prediction_mmxext;
1013 c->sum_abs_dctelem= sum_abs_dctelem_sse2;
1016 #if HAVE_SSSE3_INLINE 1019 c->try_8x8basis= try_8x8basis_ssse3;
1021 c->add_8x8basis= add_8x8basis_ssse3;
1022 c->sum_abs_dctelem= sum_abs_dctelem_ssse3;
1028 c->try_8x8basis= try_8x8basis_3dnow;
1030 c->add_8x8basis= add_8x8basis_3dnow;
1036 c->hadamard8_diff[0] = ff_hadamard8_diff16_mmx;
1037 c->hadamard8_diff[1] = ff_hadamard8_diff_mmx;
1040 c->hadamard8_diff[0] = ff_hadamard8_diff16_mmxext;
1041 c->hadamard8_diff[1] = ff_hadamard8_diff_mmxext;
1047 #if HAVE_ALIGNED_STACK 1048 c->hadamard8_diff[0] = ff_hadamard8_diff16_sse2;
1049 c->hadamard8_diff[1] = ff_hadamard8_diff_sse2;
1054 c->hadamard8_diff[0] = ff_hadamard8_diff16_ssse3;
1055 c->hadamard8_diff[1] = ff_hadamard8_diff_ssse3;
#define EXTERNAL_MMX(flags)
int ff_sse16_sse2(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
output residual component w
Macro definitions for various function/variable attributes.
#define av_assert2(cond)
assert() equivalent, that does lie in speed critical code.
#define AV_CPU_FLAG_MMXEXT
SSE integer functions or AMD MMX ext.
#define hadamard_func(cpu)
int ff_pix_sum16_mmx(uint8_t *pix, int line_size)
#define CODEC_FLAG_BITEXACT
Use only bitexact stuff (except (I)DCT).
#define EXTERNAL_SSE2(flags)
void ff_fdct_mmxext(int16_t *block)
#define AV_CPU_FLAG_SSSE3
Conroe SSSE3 functions.
void ff_fdct_mmx(int16_t *block)
void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2, int stride)
void ff_fdct_sse2(int16_t *block)
void ff_dsputil_init_pix_mmx(DSPContext *c, AVCodecContext *avctx)
void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx)
#define AV_CPU_FLAG_3DNOW
AMD 3DNOW.
main external API structure.
#define AV_CPU_FLAG_MMX
standard MMX
void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size)
#define EXTERNAL_SSSE3(flags)
synthesis window for stochastic i
DSPContext dsp
pointers for accelerated dsp functions
int ff_pix_norm1_mmx(uint8_t *pix, int line_size)
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
#define EXTERNAL_MMXEXT(flags)
struct AVCodecContext * avctx
#define HAVE_ALIGNED_STACK
else dst[i][x+y *dst_stride[i]]
#define AV_CPU_FLAG_SSE2
PIV SSE2 functions.
void ff_get_pixels_sse2(int16_t *block, const uint8_t *pixels, int line_size)