34 # define RENAME(a) a ## _C 36 # define TEMPLATE_PP_C 0 39 #ifdef TEMPLATE_PP_ALTIVEC 40 # define RENAME(a) a ## _altivec 42 # define TEMPLATE_PP_ALTIVEC 0 45 #ifdef TEMPLATE_PP_MMX 46 # define RENAME(a) a ## _MMX 48 # define TEMPLATE_PP_MMX 0 51 #ifdef TEMPLATE_PP_MMXEXT 52 # undef TEMPLATE_PP_MMX 53 # define TEMPLATE_PP_MMX 1 54 # define RENAME(a) a ## _MMX2 56 # define TEMPLATE_PP_MMXEXT 0 59 #ifdef TEMPLATE_PP_3DNOW 60 # undef TEMPLATE_PP_MMX 61 # define TEMPLATE_PP_MMX 1 62 # define RENAME(a) a ## _3DNow 64 # define TEMPLATE_PP_3DNOW 0 67 #ifdef TEMPLATE_PP_SSE2 68 # undef TEMPLATE_PP_MMX 69 # define TEMPLATE_PP_MMX 1 70 # undef TEMPLATE_PP_MMXEXT 71 # define TEMPLATE_PP_MMXEXT 1 72 # define RENAME(a) a ## _SSE2 74 # define TEMPLATE_PP_SSE2 0 82 #if TEMPLATE_PP_MMXEXT 83 #define REAL_PAVGB(a,b) "pavgb " #a ", " #b " \n\t" 84 #elif TEMPLATE_PP_3DNOW 85 #define REAL_PAVGB(a,b) "pavgusb " #a ", " #b " \n\t" 87 #define PAVGB(a,b) REAL_PAVGB(a,b) 89 #if TEMPLATE_PP_MMXEXT 90 #define PMINUB(a,b,t) "pminub " #a ", " #b " \n\t" 92 #define PMINUB(b,a,t) \ 93 "movq " #a ", " #t " \n\t"\ 94 "psubusb " #b ", " #t " \n\t"\ 95 "psubb " #t ", " #a " \n\t" 98 #if TEMPLATE_PP_MMXEXT 99 #define PMAXUB(a,b) "pmaxub " #a ", " #b " \n\t" 100 #elif TEMPLATE_PP_MMX 101 #define PMAXUB(a,b) \ 102 "psubusb " #a ", " #b " \n\t"\ 103 "paddb " #a ", " #b " \n\t" 115 "movq %0, %%mm7 \n\t" 116 "movq %1, %%mm6 \n\t" 117 : :
"m" (
c->mmxDcOffset[
c->nonBQP]),
"m" (
c->mmxDcThreshold[
c->nonBQP])
121 "lea (%2, %3), %%"REG_a
" \n\t" 125 "movq (%2), %%mm0 \n\t" 126 "movq (%%"REG_a
"), %%mm1 \n\t" 127 "movq %%mm0, %%mm3 \n\t" 128 "movq %%mm0, %%mm4 \n\t" 130 PMINUB(%%mm1, %%mm3, %%mm5)
131 "psubb %%mm1, %%mm0 \n\t" 132 "paddb %%mm7, %%mm0 \n\t" 133 "pcmpgtb %%mm6, %%mm0 \n\t" 135 "movq (%%"REG_a
",%3), %%mm2 \n\t" 137 PMINUB(%%mm2, %%mm3, %%mm5)
138 "psubb %%mm2, %%mm1 \n\t" 139 "paddb %%mm7, %%mm1 \n\t" 140 "pcmpgtb %%mm6, %%mm1 \n\t" 141 "paddb %%mm1, %%mm0 \n\t" 143 "movq (%%"REG_a
", %3, 2), %%mm1 \n\t" 145 PMINUB(%%mm1, %%mm3, %%mm5)
146 "psubb %%mm1, %%mm2 \n\t" 147 "paddb %%mm7, %%mm2 \n\t" 148 "pcmpgtb %%mm6, %%mm2 \n\t" 149 "paddb %%mm2, %%mm0 \n\t" 151 "lea (%%"REG_a
", %3, 4), %%"REG_a
" \n\t" 153 "movq (%2, %3, 4), %%mm2 \n\t" 155 PMINUB(%%mm2, %%mm3, %%mm5)
156 "psubb %%mm2, %%mm1 \n\t" 157 "paddb %%mm7, %%mm1 \n\t" 158 "pcmpgtb %%mm6, %%mm1 \n\t" 159 "paddb %%mm1, %%mm0 \n\t" 161 "movq (%%"REG_a
"), %%mm1 \n\t" 163 PMINUB(%%mm1, %%mm3, %%mm5)
164 "psubb %%mm1, %%mm2 \n\t" 165 "paddb %%mm7, %%mm2 \n\t" 166 "pcmpgtb %%mm6, %%mm2 \n\t" 167 "paddb %%mm2, %%mm0 \n\t" 169 "movq (%%"REG_a
", %3), %%mm2 \n\t" 171 PMINUB(%%mm2, %%mm3, %%mm5)
172 "psubb %%mm2, %%mm1 \n\t" 173 "paddb %%mm7, %%mm1 \n\t" 174 "pcmpgtb %%mm6, %%mm1 \n\t" 175 "paddb %%mm1, %%mm0 \n\t" 177 "movq (%%"REG_a
", %3, 2), %%mm1 \n\t" 179 PMINUB(%%mm1, %%mm3, %%mm5)
180 "psubb %%mm1, %%mm2 \n\t" 181 "paddb %%mm7, %%mm2 \n\t" 182 "pcmpgtb %%mm6, %%mm2 \n\t" 183 "paddb %%mm2, %%mm0 \n\t" 184 "psubusb %%mm3, %%mm4 \n\t" 187 #if TEMPLATE_PP_MMXEXT 188 "pxor %%mm7, %%mm7 \n\t" 189 "psadbw %%mm7, %%mm0 \n\t" 191 "movq %%mm0, %%mm1 \n\t" 192 "psrlw $8, %%mm0 \n\t" 193 "paddb %%mm1, %%mm0 \n\t" 194 "movq %%mm0, %%mm1 \n\t" 195 "psrlq $16, %%mm0 \n\t" 196 "paddb %%mm1, %%mm0 \n\t" 197 "movq %%mm0, %%mm1 \n\t" 198 "psrlq $32, %%mm0 \n\t" 199 "paddb %%mm1, %%mm0 \n\t" 201 "movq %4, %%mm7 \n\t" 202 "paddusb %%mm7, %%mm7 \n\t" 203 "psubusb %%mm7, %%mm4 \n\t" 204 "packssdw %%mm4, %%mm4 \n\t" 205 "movd %%mm0, %0 \n\t" 206 "movd %%mm4, %1 \n\t" 208 :
"=r" (numEq),
"=r" (dcOk)
213 numEq= (-numEq) &0xFF;
214 if(numEq >
c->ppMode.flatnessThreshold){
221 #endif //TEMPLATE_PP_MMX 227 #if !TEMPLATE_PP_ALTIVEC 230 #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 233 "movq %2, %%mm0 \n\t" 234 "pxor %%mm4, %%mm4 \n\t" 236 "movq (%0), %%mm6 \n\t" 237 "movq (%0, %1), %%mm5 \n\t" 238 "movq %%mm5, %%mm1 \n\t" 239 "movq %%mm6, %%mm2 \n\t" 240 "psubusb %%mm6, %%mm5 \n\t" 241 "psubusb %%mm1, %%mm2 \n\t" 242 "por %%mm5, %%mm2 \n\t" 243 "psubusb %%mm0, %%mm2 \n\t" 244 "pcmpeqb %%mm4, %%mm2 \n\t" 246 "pand %%mm2, %%mm6 \n\t" 247 "pandn %%mm1, %%mm2 \n\t" 248 "por %%mm2, %%mm6 \n\t" 250 "movq (%0, %1, 8), %%mm5 \n\t" 251 "lea (%0, %1, 4), %%"REG_a
" \n\t" 252 "lea (%0, %1, 8), %%"REG_c
" \n\t" 253 "sub %1, %%"REG_c
" \n\t" 255 "movq (%0, %1, 8), %%mm7 \n\t" 256 "movq %%mm5, %%mm1 \n\t" 257 "movq %%mm7, %%mm2 \n\t" 258 "psubusb %%mm7, %%mm5 \n\t" 259 "psubusb %%mm1, %%mm2 \n\t" 260 "por %%mm5, %%mm2 \n\t" 261 "psubusb %%mm0, %%mm2 \n\t" 262 "pcmpeqb %%mm4, %%mm2 \n\t" 264 "pand %%mm2, %%mm7 \n\t" 265 "pandn %%mm1, %%mm2 \n\t" 266 "por %%mm2, %%mm7 \n\t" 275 "movq (%0, %1), %%mm0 \n\t" 276 "movq %%mm0, %%mm1 \n\t" 280 "movq (%0, %1, 4), %%mm2 \n\t" 281 "movq %%mm2, %%mm5 \n\t" 282 PAVGB((%%REGa), %%mm2)
283 PAVGB((%0, %1, 2), %%mm2)
284 "movq %%mm2, %%mm3 \n\t" 285 "movq (%0), %%mm4 \n\t" 288 "movq %%mm3, (%0) \n\t" 290 "movq %%mm1, %%mm0 \n\t" 292 "movq %%mm4, %%mm3 \n\t" 293 PAVGB((%0,%1,2), %%mm3)
294 PAVGB((%%REGa,%1,2), %%mm5)
295 PAVGB((%%REGa), %%mm5)
298 "movq %%mm3, (%0,%1) \n\t" 301 "movq (%%"REG_c
"), %%mm0 \n\t" 302 PAVGB((%%REGa, %1, 2), %%mm0)
303 "movq %%mm0, %%mm3 \n\t" 307 "movq (%0, %1, 2), %%mm2 \n\t" 308 "movq %%mm0, (%0, %1, 2) \n\t" 310 "movq (%%"REG_a
", %1, 4), %%mm0 \n\t" 311 PAVGB((%%REGc), %%mm0)
317 "movq (%%"REG_a
"), %%mm5 \n\t" 318 "movq %%mm6, (%%"REG_a
") \n\t" 320 "movq (%%"REG_a
", %1, 4), %%mm6 \n\t" 325 "movq (%0, %1, 4), %%mm4 \n\t" 328 "movq %%mm6, (%0, %1, 4) \n\t" 333 "movq (%%"REG_a
", %1, 2), %%mm6 \n\t" 336 "movq %%mm1, (%%"REG_a
", %1, 2) \n\t" 338 PAVGB((%%REGc), %%mm2)
339 "movq (%%"REG_a
", %1, 4), %%mm0 \n\t" 343 "movq %%mm6, (%%"REG_c
") \n\t" 350 "movq %%mm5, (%%"REG_a
", %1, 4) \n\t" 357 #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 359 const int l2=
stride + l1;
360 const int l3=
stride + l2;
361 const int l4=
stride + l3;
362 const int l5=
stride + l4;
363 const int l6=
stride + l5;
364 const int l7=
stride + l6;
365 const int l8=
stride + l7;
366 const int l9=
stride + l8;
374 sums[0] = 4*first +
src[l1] +
src[l2] +
src[l3] + 4;
375 sums[1] = sums[0] - first + src[l4];
376 sums[2] = sums[1] - first + src[l5];
377 sums[3] = sums[2] - first + src[l6];
378 sums[4] = sums[3] - first + src[l7];
379 sums[5] = sums[4] - src[l1] + src[l8];
380 sums[6] = sums[5] - src[l2] + last;
381 sums[7] = sums[6] - src[l3] + last;
382 sums[8] = sums[7] - src[l4] + last;
383 sums[9] = sums[8] - src[l5] + last;
385 src[l1]= (sums[0] + sums[2] + 2*src[l1])>>4;
386 src[l2]= (sums[1] + sums[3] + 2*src[l2])>>4;
387 src[l3]= (sums[2] + sums[4] + 2*src[l3])>>4;
388 src[l4]= (sums[3] + sums[5] + 2*src[l4])>>4;
389 src[l5]= (sums[4] + sums[6] + 2*src[l5])>>4;
390 src[l6]= (sums[5] + sums[7] + 2*src[l6])>>4;
391 src[l7]= (sums[6] + sums[8] + 2*src[l7])>>4;
392 src[l8]= (sums[7] + sums[9] + 2*src[l8])>>4;
396 #endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 398 #endif //TEMPLATE_PP_ALTIVEC 409 #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 413 "pxor %%mm7, %%mm7 \n\t" 414 "lea (%0, %1), %%"REG_a
" \n\t" 415 "lea (%%"REG_a
", %1, 4), %%"REG_c
" \n\t" 418 "movq (%%"REG_a
", %1, 2), %%mm0 \n\t" 419 "movq (%0, %1, 4), %%mm1 \n\t" 420 "movq %%mm1, %%mm2 \n\t" 421 "psubusb %%mm0, %%mm1 \n\t" 422 "psubusb %%mm2, %%mm0 \n\t" 423 "por %%mm1, %%mm0 \n\t" 424 "movq (%%"REG_c
"), %%mm3 \n\t" 425 "movq (%%"REG_c
", %1), %%mm4 \n\t" 426 "movq %%mm3, %%mm5 \n\t" 427 "psubusb %%mm4, %%mm3 \n\t" 428 "psubusb %%mm5, %%mm4 \n\t" 429 "por %%mm4, %%mm3 \n\t" 431 "movq %%mm2, %%mm1 \n\t" 432 "psubusb %%mm5, %%mm2 \n\t" 433 "movq %%mm2, %%mm4 \n\t" 434 "pcmpeqb %%mm7, %%mm2 \n\t" 435 "psubusb %%mm1, %%mm5 \n\t" 436 "por %%mm5, %%mm4 \n\t" 437 "psubusb %%mm0, %%mm4 \n\t" 438 "movq %%mm4, %%mm3 \n\t" 439 "movq %2, %%mm0 \n\t" 440 "paddusb %%mm0, %%mm0 \n\t" 441 "psubusb %%mm0, %%mm4 \n\t" 442 "pcmpeqb %%mm7, %%mm4 \n\t" 443 "psubusb "MANGLE(b01)
", %%mm3 \n\t" 444 "pand %%mm4, %%mm3 \n\t" 447 "movq %%mm3, %%mm1 \n\t" 451 "movq (%0, %1, 4), %%mm0 \n\t" 452 "pxor %%mm2, %%mm0 \n\t" 453 "psubusb %%mm3, %%mm0 \n\t" 454 "pxor %%mm2, %%mm0 \n\t" 455 "movq %%mm0, (%0, %1, 4) \n\t" 457 "movq (%%"REG_c
"), %%mm0 \n\t" 458 "pxor %%mm2, %%mm0 \n\t" 459 "paddusb %%mm3, %%mm0 \n\t" 460 "pxor %%mm2, %%mm0 \n\t" 461 "movq %%mm0, (%%"REG_c
") \n\t" 465 "movq (%%"REG_a
", %1, 2), %%mm0 \n\t" 466 "pxor %%mm2, %%mm0 \n\t" 467 "psubusb %%mm1, %%mm0 \n\t" 468 "pxor %%mm2, %%mm0 \n\t" 469 "movq %%mm0, (%%"REG_a
", %1, 2) \n\t" 471 "movq (%%"REG_c
", %1), %%mm0 \n\t" 472 "pxor %%mm2, %%mm0 \n\t" 473 "paddusb %%mm1, %%mm0 \n\t" 474 "pxor %%mm2, %%mm0 \n\t" 475 "movq %%mm0, (%%"REG_c
", %1) \n\t" 479 "movq (%%"REG_a
", %1), %%mm0 \n\t" 480 "pxor %%mm2, %%mm0 \n\t" 481 "psubusb %%mm1, %%mm0 \n\t" 482 "pxor %%mm2, %%mm0 \n\t" 483 "movq %%mm0, (%%"REG_a
", %1) \n\t" 485 "movq (%%"REG_c
", %1, 2), %%mm0 \n\t" 486 "pxor %%mm2, %%mm0 \n\t" 487 "paddusb %%mm1, %%mm0 \n\t" 488 "pxor %%mm2, %%mm0 \n\t" 489 "movq %%mm0, (%%"REG_c
", %1, 2) \n\t" 495 #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 498 const int l2=
stride + l1;
499 const int l3=
stride + l2;
500 const int l4=
stride + l3;
501 const int l5=
stride + l4;
502 const int l6=
stride + l5;
503 const int l7=
stride + l6;
511 int b= src[l4] - src[l5];
512 int c= src[l5] - src[l6];
529 #endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 532 #if !TEMPLATE_PP_ALTIVEC 535 #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 553 #if 0 //slightly more accurate and slightly slower 554 "pxor %%mm7, %%mm7 \n\t" 555 "lea (%0, %1), %%"REG_a
" \n\t" 556 "lea (%%"REG_a
", %1, 4), %%"REG_c
" \n\t" 562 "movq (%0, %1, 2), %%mm0 \n\t" 563 "movq (%0), %%mm1 \n\t" 564 "movq %%mm0, %%mm2 \n\t" 569 "movq (%%"REG_a
"), %%mm1 \n\t" 570 "movq (%%"REG_a
", %1, 2), %%mm3 \n\t" 571 "movq %%mm1, %%mm4 \n\t" 576 "movq %%mm0, %%mm4 \n\t" 577 "psubusb %%mm1, %%mm0 \n\t" 578 "psubusb %%mm4, %%mm1 \n\t" 579 "por %%mm0, %%mm1 \n\t" 582 "movq (%0, %1, 4), %%mm0 \n\t" 583 "movq %%mm0, %%mm4 \n\t" 588 "movq (%%"REG_c
"), %%mm2 \n\t" 589 "movq %%mm3, %%mm5 \n\t" 594 "movq %%mm0, %%mm6 \n\t" 595 "psubusb %%mm3, %%mm0 \n\t" 596 "psubusb %%mm6, %%mm3 \n\t" 597 "por %%mm0, %%mm3 \n\t" 598 "pcmpeqb %%mm7, %%mm0 \n\t" 601 "movq (%%"REG_c
", %1), %%mm6 \n\t" 602 "movq %%mm6, %%mm5 \n\t" 607 "movq (%%"REG_c
", %1, 2), %%mm5 \n\t" 608 "movq %%mm2, %%mm4 \n\t" 613 "movq %%mm6, %%mm4 \n\t" 614 "psubusb %%mm2, %%mm6 \n\t" 615 "psubusb %%mm4, %%mm2 \n\t" 616 "por %%mm6, %%mm2 \n\t" 620 PMINUB(%%mm2, %%mm1, %%mm4)
621 "movq %2, %%mm4 \n\t" 622 "paddusb "MANGLE(b01)
", %%mm4 \n\t" 623 "pcmpgtb %%mm3, %%mm4 \n\t" 624 "psubusb %%mm1, %%mm3 \n\t" 625 "pand %%mm4, %%mm3 \n\t" 627 "movq %%mm3, %%mm1 \n\t" 631 "paddusb %%mm1, %%mm3 \n\t" 634 "movq (%%"REG_a
", %1, 2), %%mm6 \n\t" 635 "movq (%0, %1, 4), %%mm5 \n\t" 636 "movq (%0, %1, 4), %%mm4 \n\t" 637 "psubusb %%mm6, %%mm5 \n\t" 638 "psubusb %%mm4, %%mm6 \n\t" 639 "por %%mm6, %%mm5 \n\t" 640 "pcmpeqb %%mm7, %%mm6 \n\t" 641 "pxor %%mm6, %%mm0 \n\t" 642 "pand %%mm0, %%mm3 \n\t" 643 PMINUB(%%mm5, %%mm3, %%mm0)
645 "psubusb "MANGLE(b01)
", %%mm3 \n\t" 648 "movq (%%"REG_a
", %1, 2), %%mm0 \n\t" 649 "movq (%0, %1, 4), %%mm2 \n\t" 650 "pxor %%mm6, %%mm0 \n\t" 651 "pxor %%mm6, %%mm2 \n\t" 652 "psubb %%mm3, %%mm0 \n\t" 653 "paddb %%mm3, %%mm2 \n\t" 654 "pxor %%mm6, %%mm0 \n\t" 655 "pxor %%mm6, %%mm2 \n\t" 656 "movq %%mm0, (%%"REG_a
", %1, 2) \n\t" 657 "movq %%mm2, (%0, %1, 4) \n\t" 660 "lea (%0, %1), %%"REG_a
" \n\t" 661 "pcmpeqb %%mm6, %%mm6 \n\t" 667 "movq (%%"REG_a
", %1, 2), %%mm1 \n\t" 668 "movq (%0, %1, 4), %%mm0 \n\t" 669 "pxor %%mm6, %%mm1 \n\t" 673 "movq (%%"REG_a
", %1, 4), %%mm2 \n\t" 674 "movq (%%"REG_a
", %1), %%mm3 \n\t" 675 "pxor %%mm6, %%mm2 \n\t" 676 "movq %%mm2, %%mm5 \n\t" 677 "movq "MANGLE(b80)
", %%mm4 \n\t" 678 "lea (%%"REG_a
", %1, 4), %%"REG_c
" \n\t" 685 "movq (%%"REG_a
"), %%mm2 \n\t" 686 "pxor %%mm6, %%mm2 \n\t" 689 "movq "MANGLE(b80)
", %%mm3 \n\t" 695 PAVGB((%%REGc, %1), %%mm5)
696 "movq (%%"REG_c
", %1, 2), %%mm1 \n\t" 697 "pxor %%mm6, %%mm1 \n\t" 698 PAVGB((%0, %1, 4), %%mm1)
699 "movq "MANGLE(b80)
", %%mm2 \n\t" 705 "movq "MANGLE(b00)
", %%mm1 \n\t" 706 "movq "MANGLE(b00)
", %%mm5 \n\t" 707 "psubb %%mm2, %%mm1 \n\t" 708 "psubb %%mm3, %%mm5 \n\t" 711 PMINUB(%%mm2, %%mm3, %%mm1)
715 "movq "MANGLE(b00)
", %%mm7 \n\t" 716 "movq %2, %%mm2 \n\t" 718 "psubb %%mm6, %%mm2 \n\t" 720 "movq %%mm4, %%mm1 \n\t" 721 "pcmpgtb %%mm7, %%mm1 \n\t" 722 "pxor %%mm1, %%mm4 \n\t" 723 "psubb %%mm1, %%mm4 \n\t" 724 "pcmpgtb %%mm4, %%mm2 \n\t" 725 "psubusb %%mm3, %%mm4 \n\t" 728 "movq %%mm4, %%mm3 \n\t" 729 "psubusb "MANGLE(b01)
", %%mm4 \n\t" 732 "paddb %%mm3, %%mm4 \n\t" 733 "pand %%mm2, %%mm4 \n\t" 735 "movq "MANGLE(b80)
", %%mm5 \n\t" 736 "psubb %%mm0, %%mm5 \n\t" 737 "paddsb %%mm6, %%mm5 \n\t" 738 "pcmpgtb %%mm5, %%mm7 \n\t" 739 "pxor %%mm7, %%mm5 \n\t" 741 PMINUB(%%mm5, %%mm4, %%mm3)
742 "pxor %%mm1, %%mm7 \n\t" 744 "pand %%mm7, %%mm4 \n\t" 745 "movq (%%"REG_a
", %1, 2), %%mm0 \n\t" 746 "movq (%0, %1, 4), %%mm2 \n\t" 747 "pxor %%mm1, %%mm0 \n\t" 748 "pxor %%mm1, %%mm2 \n\t" 749 "paddb %%mm4, %%mm0 \n\t" 750 "psubb %%mm4, %%mm2 \n\t" 751 "pxor %%mm1, %%mm0 \n\t" 752 "pxor %%mm1, %%mm2 \n\t" 753 "movq %%mm0, (%%"REG_a
", %1, 2) \n\t" 754 "movq %%mm2, (%0, %1, 4) \n\t" 816 #elif TEMPLATE_PP_MMX 820 "pxor %%mm7, %%mm7 \n\t" 825 "movq (%0), %%mm0 \n\t" 826 "movq %%mm0, %%mm1 \n\t" 827 "punpcklbw %%mm7, %%mm0 \n\t" 828 "punpckhbw %%mm7, %%mm1 \n\t" 830 "movq (%0, %1), %%mm2 \n\t" 831 "lea (%0, %1, 2), %%"REG_a
" \n\t" 832 "movq %%mm2, %%mm3 \n\t" 833 "punpcklbw %%mm7, %%mm2 \n\t" 834 "punpckhbw %%mm7, %%mm3 \n\t" 836 "movq (%%"REG_a
"), %%mm4 \n\t" 837 "movq %%mm4, %%mm5 \n\t" 838 "punpcklbw %%mm7, %%mm4 \n\t" 839 "punpckhbw %%mm7, %%mm5 \n\t" 841 "paddw %%mm0, %%mm0 \n\t" 842 "paddw %%mm1, %%mm1 \n\t" 843 "psubw %%mm4, %%mm2 \n\t" 844 "psubw %%mm5, %%mm3 \n\t" 845 "psubw %%mm2, %%mm0 \n\t" 846 "psubw %%mm3, %%mm1 \n\t" 848 "psllw $2, %%mm2 \n\t" 849 "psllw $2, %%mm3 \n\t" 850 "psubw %%mm2, %%mm0 \n\t" 851 "psubw %%mm3, %%mm1 \n\t" 853 "movq (%%"REG_a
", %1), %%mm2 \n\t" 854 "movq %%mm2, %%mm3 \n\t" 855 "punpcklbw %%mm7, %%mm2 \n\t" 856 "punpckhbw %%mm7, %%mm3 \n\t" 858 "psubw %%mm2, %%mm0 \n\t" 859 "psubw %%mm3, %%mm1 \n\t" 860 "psubw %%mm2, %%mm0 \n\t" 861 "psubw %%mm3, %%mm1 \n\t" 862 "movq %%mm0, (%3) \n\t" 863 "movq %%mm1, 8(%3) \n\t" 865 "movq (%%"REG_a
", %1, 2), %%mm0 \n\t" 866 "movq %%mm0, %%mm1 \n\t" 867 "punpcklbw %%mm7, %%mm0 \n\t" 868 "punpckhbw %%mm7, %%mm1 \n\t" 870 "psubw %%mm0, %%mm2 \n\t" 871 "psubw %%mm1, %%mm3 \n\t" 872 "movq %%mm2, 16(%3) \n\t" 873 "movq %%mm3, 24(%3) \n\t" 874 "paddw %%mm4, %%mm4 \n\t" 875 "paddw %%mm5, %%mm5 \n\t" 876 "psubw %%mm2, %%mm4 \n\t" 877 "psubw %%mm3, %%mm5 \n\t" 879 "lea (%%"REG_a
", %1), %0 \n\t" 880 "psllw $2, %%mm2 \n\t" 881 "psllw $2, %%mm3 \n\t" 882 "psubw %%mm2, %%mm4 \n\t" 883 "psubw %%mm3, %%mm5 \n\t" 885 "movq (%0, %1, 2), %%mm2 \n\t" 886 "movq %%mm2, %%mm3 \n\t" 887 "punpcklbw %%mm7, %%mm2 \n\t" 888 "punpckhbw %%mm7, %%mm3 \n\t" 889 "psubw %%mm2, %%mm4 \n\t" 890 "psubw %%mm3, %%mm5 \n\t" 891 "psubw %%mm2, %%mm4 \n\t" 892 "psubw %%mm3, %%mm5 \n\t" 894 "movq (%%"REG_a
", %1, 4), %%mm6 \n\t" 895 "punpcklbw %%mm7, %%mm6 \n\t" 896 "psubw %%mm6, %%mm2 \n\t" 897 "movq (%%"REG_a
", %1, 4), %%mm6 \n\t" 898 "punpckhbw %%mm7, %%mm6 \n\t" 899 "psubw %%mm6, %%mm3 \n\t" 901 "paddw %%mm0, %%mm0 \n\t" 902 "paddw %%mm1, %%mm1 \n\t" 903 "psubw %%mm2, %%mm0 \n\t" 904 "psubw %%mm3, %%mm1 \n\t" 906 "psllw $2, %%mm2 \n\t" 907 "psllw $2, %%mm3 \n\t" 908 "psubw %%mm2, %%mm0 \n\t" 909 "psubw %%mm3, %%mm1 \n\t" 911 "movq (%0, %1, 4), %%mm2 \n\t" 912 "movq %%mm2, %%mm3 \n\t" 913 "punpcklbw %%mm7, %%mm2 \n\t" 914 "punpckhbw %%mm7, %%mm3 \n\t" 916 "paddw %%mm2, %%mm2 \n\t" 917 "paddw %%mm3, %%mm3 \n\t" 918 "psubw %%mm2, %%mm0 \n\t" 919 "psubw %%mm3, %%mm1 \n\t" 921 "movq (%3), %%mm2 \n\t" 922 "movq 8(%3), %%mm3 \n\t" 924 #if TEMPLATE_PP_MMXEXT 925 "movq %%mm7, %%mm6 \n\t" 926 "psubw %%mm0, %%mm6 \n\t" 927 "pmaxsw %%mm6, %%mm0 \n\t" 928 "movq %%mm7, %%mm6 \n\t" 929 "psubw %%mm1, %%mm6 \n\t" 930 "pmaxsw %%mm6, %%mm1 \n\t" 931 "movq %%mm7, %%mm6 \n\t" 932 "psubw %%mm2, %%mm6 \n\t" 933 "pmaxsw %%mm6, %%mm2 \n\t" 934 "movq %%mm7, %%mm6 \n\t" 935 "psubw %%mm3, %%mm6 \n\t" 936 "pmaxsw %%mm6, %%mm3 \n\t" 938 "movq %%mm7, %%mm6 \n\t" 939 "pcmpgtw %%mm0, %%mm6 \n\t" 940 "pxor %%mm6, %%mm0 \n\t" 941 "psubw %%mm6, %%mm0 \n\t" 942 "movq %%mm7, %%mm6 \n\t" 943 "pcmpgtw %%mm1, %%mm6 \n\t" 944 "pxor %%mm6, %%mm1 \n\t" 945 "psubw %%mm6, %%mm1 \n\t" 946 "movq %%mm7, %%mm6 \n\t" 947 "pcmpgtw %%mm2, %%mm6 \n\t" 948 "pxor %%mm6, %%mm2 \n\t" 949 "psubw %%mm6, %%mm2 \n\t" 950 "movq %%mm7, %%mm6 \n\t" 951 "pcmpgtw %%mm3, %%mm6 \n\t" 952 "pxor %%mm6, %%mm3 \n\t" 953 "psubw %%mm6, %%mm3 \n\t" 956 #if TEMPLATE_PP_MMXEXT 957 "pminsw %%mm2, %%mm0 \n\t" 958 "pminsw %%mm3, %%mm1 \n\t" 960 "movq %%mm0, %%mm6 \n\t" 961 "psubusw %%mm2, %%mm6 \n\t" 962 "psubw %%mm6, %%mm0 \n\t" 963 "movq %%mm1, %%mm6 \n\t" 964 "psubusw %%mm3, %%mm6 \n\t" 965 "psubw %%mm6, %%mm1 \n\t" 968 "movd %2, %%mm2 \n\t" 969 "punpcklbw %%mm7, %%mm2 \n\t" 971 "movq %%mm7, %%mm6 \n\t" 972 "pcmpgtw %%mm4, %%mm6 \n\t" 973 "pxor %%mm6, %%mm4 \n\t" 974 "psubw %%mm6, %%mm4 \n\t" 975 "pcmpgtw %%mm5, %%mm7 \n\t" 976 "pxor %%mm7, %%mm5 \n\t" 977 "psubw %%mm7, %%mm5 \n\t" 979 "psllw $3, %%mm2 \n\t" 980 "movq %%mm2, %%mm3 \n\t" 981 "pcmpgtw %%mm4, %%mm2 \n\t" 982 "pcmpgtw %%mm5, %%mm3 \n\t" 983 "pand %%mm2, %%mm4 \n\t" 984 "pand %%mm3, %%mm5 \n\t" 987 "psubusw %%mm0, %%mm4 \n\t" 988 "psubusw %%mm1, %%mm5 \n\t" 991 "movq "MANGLE(w05)
", %%mm2 \n\t" 992 "pmullw %%mm2, %%mm4 \n\t" 993 "pmullw %%mm2, %%mm5 \n\t" 994 "movq "MANGLE(w20)
", %%mm2 \n\t" 995 "paddw %%mm2, %%mm4 \n\t" 996 "paddw %%mm2, %%mm5 \n\t" 997 "psrlw $6, %%mm4 \n\t" 998 "psrlw $6, %%mm5 \n\t" 1000 "movq 16(%3), %%mm0 \n\t" 1001 "movq 24(%3), %%mm1 \n\t" 1003 "pxor %%mm2, %%mm2 \n\t" 1004 "pxor %%mm3, %%mm3 \n\t" 1006 "pcmpgtw %%mm0, %%mm2 \n\t" 1007 "pcmpgtw %%mm1, %%mm3 \n\t" 1008 "pxor %%mm2, %%mm0 \n\t" 1009 "pxor %%mm3, %%mm1 \n\t" 1010 "psubw %%mm2, %%mm0 \n\t" 1011 "psubw %%mm3, %%mm1 \n\t" 1012 "psrlw $1, %%mm0 \n\t" 1013 "psrlw $1, %%mm1 \n\t" 1015 "pxor %%mm6, %%mm2 \n\t" 1016 "pxor %%mm7, %%mm3 \n\t" 1017 "pand %%mm2, %%mm4 \n\t" 1018 "pand %%mm3, %%mm5 \n\t" 1020 #if TEMPLATE_PP_MMXEXT 1021 "pminsw %%mm0, %%mm4 \n\t" 1022 "pminsw %%mm1, %%mm5 \n\t" 1024 "movq %%mm4, %%mm2 \n\t" 1025 "psubusw %%mm0, %%mm2 \n\t" 1026 "psubw %%mm2, %%mm4 \n\t" 1027 "movq %%mm5, %%mm2 \n\t" 1028 "psubusw %%mm1, %%mm2 \n\t" 1029 "psubw %%mm2, %%mm5 \n\t" 1031 "pxor %%mm6, %%mm4 \n\t" 1032 "pxor %%mm7, %%mm5 \n\t" 1033 "psubw %%mm6, %%mm4 \n\t" 1034 "psubw %%mm7, %%mm5 \n\t" 1035 "packsswb %%mm5, %%mm4 \n\t" 1036 "movq (%0), %%mm0 \n\t" 1037 "paddb %%mm4, %%mm0 \n\t" 1038 "movq %%mm0, (%0) \n\t" 1039 "movq (%0, %1), %%mm0 \n\t" 1040 "psubb %%mm4, %%mm0 \n\t" 1041 "movq %%mm0, (%0, %1) \n\t" 1047 #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 1049 const int l2=
stride + l1;
1050 const int l3=
stride + l2;
1051 const int l4=
stride + l3;
1052 const int l5=
stride + l4;
1053 const int l6=
stride + l5;
1054 const int l7=
stride + l6;
1055 const int l8=
stride + l7;
1060 const int middleEnergy= 5*(
src[l5] -
src[l4]) + 2*(
src[l3] -
src[l6]);
1061 if(
FFABS(middleEnergy) < 8*
c->QP){
1062 const int q=(
src[l4] -
src[l5])/2;
1063 const int leftEnergy= 5*(
src[l3] -
src[l2]) + 2*(
src[l1] -
src[l4]);
1064 const int rightEnergy= 5*(
src[l7] -
src[l6]) + 2*(
src[l5] -
src[l8]);
1070 d*=
FFSIGN(-middleEnergy);
1085 #endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 1087 #endif //TEMPLATE_PP_ALTIVEC 1089 #if !TEMPLATE_PP_ALTIVEC 1092 #if HAVE_7REGS && (TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) 1095 "pxor %%mm6, %%mm6 \n\t" 1096 "pcmpeqb %%mm7, %%mm7 \n\t" 1097 "movq %2, %%mm0 \n\t" 1098 "punpcklbw %%mm6, %%mm0 \n\t" 1099 "psrlw $1, %%mm0 \n\t" 1100 "psubw %%mm7, %%mm0 \n\t" 1101 "packuswb %%mm0, %%mm0 \n\t" 1102 "movq %%mm0, %3 \n\t" 1104 "lea (%0, %1), %%"REG_a
" \n\t" 1105 "lea (%%"REG_a
", %1, 4), %%"REG_d
" \n\t" 1110 #undef REAL_FIND_MIN_MAX 1112 #if TEMPLATE_PP_MMXEXT 1113 #define REAL_FIND_MIN_MAX(addr)\ 1114 "movq " #addr ", %%mm0 \n\t"\ 1115 "pminub %%mm0, %%mm7 \n\t"\ 1116 "pmaxub %%mm0, %%mm6 \n\t" 1118 #define REAL_FIND_MIN_MAX(addr)\ 1119 "movq " #addr ", %%mm0 \n\t"\ 1120 "movq %%mm7, %%mm1 \n\t"\ 1121 "psubusb %%mm0, %%mm6 \n\t"\ 1122 "paddb %%mm0, %%mm6 \n\t"\ 1123 "psubusb %%mm0, %%mm1 \n\t"\ 1124 "psubb %%mm1, %%mm7 \n\t" 1126 #define FIND_MIN_MAX(addr) REAL_FIND_MIN_MAX(addr) 1128 FIND_MIN_MAX((%%REGa))
1129 FIND_MIN_MAX((%%REGa, %1))
1130 FIND_MIN_MAX((%%REGa, %1, 2))
1131 FIND_MIN_MAX((%0, %1, 4))
1132 FIND_MIN_MAX((%%REGd))
1133 FIND_MIN_MAX((%%REGd, %1))
1134 FIND_MIN_MAX((%%REGd, %1, 2))
1135 FIND_MIN_MAX((%0, %1, 8))
1137 "movq %%mm7, %%mm4 \n\t" 1138 "psrlq $8, %%mm7 \n\t" 1139 #if TEMPLATE_PP_MMXEXT 1140 "pminub %%mm4, %%mm7 \n\t" 1141 "pshufw $0xF9, %%mm7, %%mm4 \n\t" 1142 "pminub %%mm4, %%mm7 \n\t" 1143 "pshufw $0xFE, %%mm7, %%mm4 \n\t" 1144 "pminub %%mm4, %%mm7 \n\t" 1146 "movq %%mm7, %%mm1 \n\t" 1147 "psubusb %%mm4, %%mm1 \n\t" 1148 "psubb %%mm1, %%mm7 \n\t" 1149 "movq %%mm7, %%mm4 \n\t" 1150 "psrlq $16, %%mm7 \n\t" 1151 "movq %%mm7, %%mm1 \n\t" 1152 "psubusb %%mm4, %%mm1 \n\t" 1153 "psubb %%mm1, %%mm7 \n\t" 1154 "movq %%mm7, %%mm4 \n\t" 1155 "psrlq $32, %%mm7 \n\t" 1156 "movq %%mm7, %%mm1 \n\t" 1157 "psubusb %%mm4, %%mm1 \n\t" 1158 "psubb %%mm1, %%mm7 \n\t" 1162 "movq %%mm6, %%mm4 \n\t" 1163 "psrlq $8, %%mm6 \n\t" 1164 #if TEMPLATE_PP_MMXEXT 1165 "pmaxub %%mm4, %%mm6 \n\t" 1166 "pshufw $0xF9, %%mm6, %%mm4 \n\t" 1167 "pmaxub %%mm4, %%mm6 \n\t" 1168 "pshufw $0xFE, %%mm6, %%mm4 \n\t" 1169 "pmaxub %%mm4, %%mm6 \n\t" 1171 "psubusb %%mm4, %%mm6 \n\t" 1172 "paddb %%mm4, %%mm6 \n\t" 1173 "movq %%mm6, %%mm4 \n\t" 1174 "psrlq $16, %%mm6 \n\t" 1175 "psubusb %%mm4, %%mm6 \n\t" 1176 "paddb %%mm4, %%mm6 \n\t" 1177 "movq %%mm6, %%mm4 \n\t" 1178 "psrlq $32, %%mm6 \n\t" 1179 "psubusb %%mm4, %%mm6 \n\t" 1180 "paddb %%mm4, %%mm6 \n\t" 1182 "movq %%mm6, %%mm0 \n\t" 1183 "psubb %%mm7, %%mm6 \n\t" 1185 "movd %%mm6, %k4 \n\t" 1186 "cmpb "MANGLE(deringThreshold)
", %b4 \n\t" 1190 "punpcklbw %%mm7, %%mm7 \n\t" 1191 "punpcklbw %%mm7, %%mm7 \n\t" 1192 "punpcklbw %%mm7, %%mm7 \n\t" 1193 "movq %%mm7, (%4) \n\t" 1195 "movq (%0), %%mm0 \n\t" 1196 "movq %%mm0, %%mm1 \n\t" 1197 "movq %%mm0, %%mm2 \n\t" 1198 "psllq $8, %%mm1 \n\t" 1199 "psrlq $8, %%mm2 \n\t" 1200 "movd -4(%0), %%mm3 \n\t" 1201 "movd 8(%0), %%mm4 \n\t" 1202 "psrlq $24, %%mm3 \n\t" 1203 "psllq $56, %%mm4 \n\t" 1204 "por %%mm3, %%mm1 \n\t" 1205 "por %%mm4, %%mm2 \n\t" 1206 "movq %%mm1, %%mm3 \n\t" 1209 "psubusb %%mm7, %%mm0 \n\t" 1210 "psubusb %%mm7, %%mm2 \n\t" 1211 "psubusb %%mm7, %%mm3 \n\t" 1212 "pcmpeqb "MANGLE(b00)
", %%mm0 \n\t" 1213 "pcmpeqb "MANGLE(b00)
", %%mm2 \n\t" 1214 "pcmpeqb "MANGLE(b00)
", %%mm3 \n\t" 1215 "paddb %%mm2, %%mm0 \n\t" 1216 "paddb %%mm3, %%mm0 \n\t" 1218 "movq (%%"REG_a
"), %%mm2 \n\t" 1219 "movq %%mm2, %%mm3 \n\t" 1220 "movq %%mm2, %%mm4 \n\t" 1221 "psllq $8, %%mm3 \n\t" 1222 "psrlq $8, %%mm4 \n\t" 1223 "movd -4(%%"REG_a
"), %%mm5 \n\t" 1224 "movd 8(%%"REG_a
"), %%mm6 \n\t" 1225 "psrlq $24, %%mm5 \n\t" 1226 "psllq $56, %%mm6 \n\t" 1227 "por %%mm5, %%mm3 \n\t" 1228 "por %%mm6, %%mm4 \n\t" 1229 "movq %%mm3, %%mm5 \n\t" 1232 "psubusb %%mm7, %%mm2 \n\t" 1233 "psubusb %%mm7, %%mm4 \n\t" 1234 "psubusb %%mm7, %%mm5 \n\t" 1235 "pcmpeqb "MANGLE(b00)
", %%mm2 \n\t" 1236 "pcmpeqb "MANGLE(b00)
", %%mm4 \n\t" 1237 "pcmpeqb "MANGLE(b00)
", %%mm5 \n\t" 1238 "paddb %%mm4, %%mm2 \n\t" 1239 "paddb %%mm5, %%mm2 \n\t" 1241 #define REAL_DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1) \ 1242 "movq " #src ", " #sx " \n\t" \ 1243 "movq " #sx ", " #lx " \n\t" \ 1244 "movq " #sx ", " #t0 " \n\t" \ 1245 "psllq $8, " #lx " \n\t"\ 1246 "psrlq $8, " #t0 " \n\t"\ 1247 "movd -4" #src ", " #t1 " \n\t"\ 1248 "psrlq $24, " #t1 " \n\t"\ 1249 "por " #t1 ", " #lx " \n\t" \ 1250 "movd 8" #src ", " #t1 " \n\t"\ 1251 "psllq $56, " #t1 " \n\t"\ 1252 "por " #t1 ", " #t0 " \n\t" \ 1253 "movq " #lx ", " #t1 " \n\t" \ 1257 "movq " #lx ", 8(%4) \n\t"\ 1258 "movq (%4), " #lx " \n\t"\ 1259 "psubusb " #lx ", " #t1 " \n\t"\ 1260 "psubusb " #lx ", " #t0 " \n\t"\ 1261 "psubusb " #lx ", " #sx " \n\t"\ 1262 "movq "MANGLE(b00)", " #lx " \n\t"\ 1263 "pcmpeqb " #lx ", " #t1 " \n\t" \ 1264 "pcmpeqb " #lx ", " #t0 " \n\t" \ 1265 "pcmpeqb " #lx ", " #sx " \n\t" \ 1266 "paddb " #t1 ", " #t0 " \n\t"\ 1267 "paddb " #t0 ", " #sx " \n\t"\ 1270 "movq " #dst ", " #t0 " \n\t" \ 1271 "movq " #t0 ", " #t1 " \n\t" \ 1272 "psubusb %3, " #t0 " \n\t"\ 1273 "paddusb %3, " #t1 " \n\t"\ 1275 PMINUB(t1, pplx, t0)\ 1276 "paddb " #sx ", " #ppsx " \n\t"\ 1277 "paddb " #psx ", " #ppsx " \n\t"\ 1278 "#paddb "MANGLE(b02)", " #ppsx " \n\t"\ 1279 "pand "MANGLE(b08)", " #ppsx " \n\t"\ 1280 "pcmpeqb " #lx ", " #ppsx " \n\t"\ 1281 "pand " #ppsx ", " #pplx " \n\t"\ 1282 "pandn " #dst ", " #ppsx " \n\t"\ 1283 "por " #pplx ", " #ppsx " \n\t"\ 1284 "movq " #ppsx ", " #dst " \n\t"\ 1285 "movq 8(%4), " #lx " \n\t" 1287 #define DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1) \ 1288 REAL_DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1) 1305 DERING_CORE((%%REGa) ,(%%REGa, %1) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
1306 DERING_CORE((%%REGa, %1) ,(%%REGa, %1, 2),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
1307 DERING_CORE((%%REGa, %1, 2),(%0, %1, 4) ,%%mm4,%%mm0,%%mm2,%%mm5,%%mm1,%%mm3,%%mm6,%%mm7)
1308 DERING_CORE((%0, %1, 4) ,(%%REGd) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
1309 DERING_CORE((%%REGd) ,(%%REGd, %1) ,%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
1310 DERING_CORE((%%REGd, %1) ,(%%REGd, %1, 2),%%mm4,%%mm0,%%mm2,%%mm5,%%mm1,%%mm3,%%mm6,%%mm7)
1311 DERING_CORE((%%REGd, %1, 2),(%0, %1, 8) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
1312 DERING_CORE((%0, %1, 8) ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
1316 :
"%"REG_a,
"%"REG_d
1318 #else // HAVE_7REGS && (TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) 1325 const int QP2=
c->QP/2 + 1;
1333 if(*p > max) max= *p;
1334 if(*p < min) min= *p;
1337 avg= (min + max + 1)>>1;
1339 if(max - min <deringThreshold)
return;
1341 for(y=0; y<10; y++){
1356 t &= (t<<1) & (t>>1);
1361 int t = s[y-1] & s[
y] & s[y+1];
1375 +2*(*(p -1)) + 4*(*p ) + 2*(*(p +1))
1379 #ifdef DEBUG_DERING_THRESHOLD 1380 __asm__
volatile(
"emms\n\t":);
1382 static long long numPixels=0;
1383 if(x!=1 && x!=8 && y!=1 && y!=8) numPixels++;
1388 static int numSkipped=0;
1389 static int errorSum=0;
1390 static int worstQP=0;
1391 static int worstRange=0;
1392 static int worstDiff=0;
1394 int absDiff=
FFABS(diff);
1395 int error= diff*
diff;
1397 if(x==1 || x==8 || y==1 || y==8)
continue;
1400 if(absDiff > worstDiff){
1403 worstRange= max-
min;
1407 if(1024LL*1024LL*1024LL % numSkipped == 0){
1409 "wRange:%d, wDiff:%d, relSkip:%1.3f\n",
1410 (
float)errorSum/numSkipped, numSkipped, worstQP, worstRange,
1411 worstDiff, (
float)numSkipped/numPixels);
1416 if (*p + QP2 < f) *p= *p + QP2;
1417 else if(*p - QP2 > f) *p= *p - QP2;
1422 #ifdef DEBUG_DERING_THRESHOLD 1430 *p =
FFMIN(*p + 20, 255);
1436 #endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 1438 #endif //TEMPLATE_PP_ALTIVEC 1448 #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 1451 "lea (%0, %1), %%"REG_a
" \n\t" 1452 "lea (%%"REG_a
", %1, 4), %%"REG_c
" \n\t" 1456 "movq (%0), %%mm0 \n\t" 1457 "movq (%%"REG_a
", %1), %%mm1 \n\t" 1459 "movq %%mm0, (%%"REG_a
") \n\t" 1460 "movq (%0, %1, 4), %%mm0 \n\t" 1462 "movq %%mm1, (%%"REG_a
", %1, 2) \n\t" 1463 "movq (%%"REG_c
", %1), %%mm1 \n\t" 1465 "movq %%mm0, (%%"REG_c
") \n\t" 1466 "movq (%0, %1, 8), %%mm0 \n\t" 1468 "movq %%mm1, (%%"REG_c
", %1, 2) \n\t" 1471 :
"%"REG_a,
"%"REG_c
1480 *(uint32_t*)&
src[
stride*1]= (a|b) - (((a^
b)&0xFEFEFEFEUL)>>1);
1482 *(uint32_t*)&
src[
stride*3]= (a|b) - (((a^
b)&0xFEFEFEFEUL)>>1);
1484 *(uint32_t*)&
src[
stride*5]= (a|b) - (((a^
b)&0xFEFEFEFEUL)>>1);
1486 *(uint32_t*)&
src[
stride*7]= (a|b) - (((a^
b)&0xFEFEFEFEUL)>>1);
1501 #if TEMPLATE_PP_SSE2 || TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 1504 "lea (%0, %1), %%"REG_a
" \n\t" 1505 "lea (%%"REG_a
", %1, 4), %%"REG_d
" \n\t" 1506 "lea (%%"REG_d
", %1, 4), %%"REG_c
" \n\t" 1507 "add %1, %%"REG_c
" \n\t" 1508 #if TEMPLATE_PP_SSE2 1509 "pxor %%xmm7, %%xmm7 \n\t" 1510 #define REAL_DEINT_CUBIC(a,b,c,d,e)\ 1511 "movq " #a ", %%xmm0 \n\t"\ 1512 "movq " #b ", %%xmm1 \n\t"\ 1513 "movq " #d ", %%xmm2 \n\t"\ 1514 "movq " #e ", %%xmm3 \n\t"\ 1515 "pavgb %%xmm2, %%xmm1 \n\t"\ 1516 "pavgb %%xmm3, %%xmm0 \n\t"\ 1517 "punpcklbw %%xmm7, %%xmm0 \n\t"\ 1518 "punpcklbw %%xmm7, %%xmm1 \n\t"\ 1519 "psubw %%xmm1, %%xmm0 \n\t"\ 1520 "psraw $3, %%xmm0 \n\t"\ 1521 "psubw %%xmm0, %%xmm1 \n\t"\ 1522 "packuswb %%xmm1, %%xmm1 \n\t"\ 1523 "movlps %%xmm1, " #c " \n\t" 1524 #else //TEMPLATE_PP_SSE2 1525 "pxor %%mm7, %%mm7 \n\t" 1529 #define REAL_DEINT_CUBIC(a,b,c,d,e)\ 1530 "movq " #a ", %%mm0 \n\t"\ 1531 "movq " #b ", %%mm1 \n\t"\ 1532 "movq " #d ", %%mm2 \n\t"\ 1533 "movq " #e ", %%mm3 \n\t"\ 1534 PAVGB(%%mm2, %%mm1) \ 1535 PAVGB(%%mm3, %%mm0) \ 1536 "movq %%mm0, %%mm2 \n\t"\ 1537 "punpcklbw %%mm7, %%mm0 \n\t"\ 1538 "punpckhbw %%mm7, %%mm2 \n\t"\ 1539 "movq %%mm1, %%mm3 \n\t"\ 1540 "punpcklbw %%mm7, %%mm1 \n\t"\ 1541 "punpckhbw %%mm7, %%mm3 \n\t"\ 1542 "psubw %%mm1, %%mm0 \n\t" \ 1543 "psubw %%mm3, %%mm2 \n\t" \ 1544 "psraw $3, %%mm0 \n\t" \ 1545 "psraw $3, %%mm2 \n\t" \ 1546 "psubw %%mm0, %%mm1 \n\t" \ 1547 "psubw %%mm2, %%mm3 \n\t" \ 1548 "packuswb %%mm3, %%mm1 \n\t"\ 1549 "movq %%mm1, " #c " \n\t" 1550 #endif //TEMPLATE_PP_SSE2 1551 #define DEINT_CUBIC(a,b,c,d,e) REAL_DEINT_CUBIC(a,b,c,d,e) 1553 DEINT_CUBIC((%0) , (%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4) , (%%REGd, %1))
1554 DEINT_CUBIC((%%REGa, %1), (%0, %1, 4) , (%%REGd) , (%%REGd, %1), (%0, %1, 8))
1555 DEINT_CUBIC((%0, %1, 4) , (%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGc))
1556 DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , (%%REGd, %1, 4), (%%REGc) , (%%REGc, %1, 2))
1561 XMM_CLOBBERS(
"%xmm0",
"%xmm1",
"%xmm2",
"%xmm3",
"%xmm7",)
1563 "%"REG_a,
"%"REG_d,
"%"REG_c
1565 #undef REAL_DEINT_CUBIC 1566 #else //TEMPLATE_PP_SSE2 || TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 1571 src[stride*5] =
CLIP((-
src[stride*2] + 9*
src[stride*4] + 9*
src[stride*6] -
src[stride*8])>>4);
1572 src[stride*7] =
CLIP((-
src[stride*4] + 9*
src[stride*6] + 9*
src[stride*8] -
src[stride*10])>>4);
1573 src[stride*9] =
CLIP((-
src[stride*6] + 9*
src[stride*8] + 9*
src[stride*10] -
src[stride*12])>>4);
1576 #endif //TEMPLATE_PP_SSE2 || TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 1588 #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 1591 "lea (%0, %1), %%"REG_a
" \n\t" 1592 "lea (%%"REG_a
", %1, 4), %%"REG_d
" \n\t" 1593 "pxor %%mm7, %%mm7 \n\t" 1594 "movq (%2), %%mm0 \n\t" 1598 #define REAL_DEINT_FF(a,b,c,d)\ 1599 "movq " #a ", %%mm1 \n\t"\ 1600 "movq " #b ", %%mm2 \n\t"\ 1601 "movq " #c ", %%mm3 \n\t"\ 1602 "movq " #d ", %%mm4 \n\t"\ 1603 PAVGB(%%mm3, %%mm1) \ 1604 PAVGB(%%mm4, %%mm0) \ 1605 "movq %%mm0, %%mm3 \n\t"\ 1606 "punpcklbw %%mm7, %%mm0 \n\t"\ 1607 "punpckhbw %%mm7, %%mm3 \n\t"\ 1608 "movq %%mm1, %%mm4 \n\t"\ 1609 "punpcklbw %%mm7, %%mm1 \n\t"\ 1610 "punpckhbw %%mm7, %%mm4 \n\t"\ 1611 "psllw $2, %%mm1 \n\t"\ 1612 "psllw $2, %%mm4 \n\t"\ 1613 "psubw %%mm0, %%mm1 \n\t"\ 1614 "psubw %%mm3, %%mm4 \n\t"\ 1615 "movq %%mm2, %%mm5 \n\t"\ 1616 "movq %%mm2, %%mm0 \n\t"\ 1617 "punpcklbw %%mm7, %%mm2 \n\t"\ 1618 "punpckhbw %%mm7, %%mm5 \n\t"\ 1619 "paddw %%mm2, %%mm1 \n\t"\ 1620 "paddw %%mm5, %%mm4 \n\t"\ 1621 "psraw $2, %%mm1 \n\t"\ 1622 "psraw $2, %%mm4 \n\t"\ 1623 "packuswb %%mm4, %%mm1 \n\t"\ 1624 "movq %%mm1, " #b " \n\t"\ 1626 #define DEINT_FF(a,b,c,d) REAL_DEINT_FF(a,b,c,d) 1628 DEINT_FF((%0) , (%%REGa) , (%%REGa, %1), (%%REGa, %1, 2))
1629 DEINT_FF((%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4) , (%%REGd) )
1630 DEINT_FF((%0, %1, 4) , (%%REGd) , (%%REGd, %1), (%%REGd, %1, 2))
1631 DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4))
1633 "movq %%mm0, (%2) \n\t" 1635 :
"%"REG_a,
"%"REG_d
1637 #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 1655 #endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 1667 #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 1670 "lea (%0, %1), %%"REG_a
" \n\t" 1671 "lea (%%"REG_a
", %1, 4), %%"REG_d
" \n\t" 1672 "pxor %%mm7, %%mm7 \n\t" 1673 "movq (%2), %%mm0 \n\t" 1674 "movq (%3), %%mm1 \n\t" 1678 #define REAL_DEINT_L5(t1,t2,a,b,c)\ 1679 "movq " #a ", %%mm2 \n\t"\ 1680 "movq " #b ", %%mm3 \n\t"\ 1681 "movq " #c ", %%mm4 \n\t"\ 1684 "movq %%mm2, %%mm5 \n\t"\ 1685 "movq %%mm2, " #t1 " \n\t"\ 1686 "punpcklbw %%mm7, %%mm2 \n\t"\ 1687 "punpckhbw %%mm7, %%mm5 \n\t"\ 1688 "movq %%mm2, %%mm6 \n\t"\ 1689 "paddw %%mm2, %%mm2 \n\t"\ 1690 "paddw %%mm6, %%mm2 \n\t"\ 1691 "movq %%mm5, %%mm6 \n\t"\ 1692 "paddw %%mm5, %%mm5 \n\t"\ 1693 "paddw %%mm6, %%mm5 \n\t"\ 1694 "movq %%mm3, %%mm6 \n\t"\ 1695 "punpcklbw %%mm7, %%mm3 \n\t"\ 1696 "punpckhbw %%mm7, %%mm6 \n\t"\ 1697 "paddw %%mm3, %%mm3 \n\t"\ 1698 "paddw %%mm6, %%mm6 \n\t"\ 1699 "paddw %%mm3, %%mm2 \n\t"\ 1700 "paddw %%mm6, %%mm5 \n\t"\ 1701 "movq %%mm4, %%mm6 \n\t"\ 1702 "punpcklbw %%mm7, %%mm4 \n\t"\ 1703 "punpckhbw %%mm7, %%mm6 \n\t"\ 1704 "psubw %%mm4, %%mm2 \n\t"\ 1705 "psubw %%mm6, %%mm5 \n\t"\ 1706 "psraw $2, %%mm2 \n\t"\ 1707 "psraw $2, %%mm5 \n\t"\ 1708 "packuswb %%mm5, %%mm2 \n\t"\ 1709 "movq %%mm2, " #a " \n\t"\ 1711 #define DEINT_L5(t1,t2,a,b,c) REAL_DEINT_L5(t1,t2,a,b,c) 1713 DEINT_L5(%%mm0, %%mm1, (%0) , (%%REGa) , (%%REGa, %1) )
1714 DEINT_L5(%%mm1, %%mm0, (%%REGa) , (%%REGa, %1) , (%%REGa, %1, 2))
1715 DEINT_L5(%%mm0, %%mm1, (%%REGa, %1) , (%%REGa, %1, 2), (%0, %1, 4) )
1716 DEINT_L5(%%mm1, %%mm0, (%%REGa, %1, 2), (%0, %1, 4) , (%%REGd) )
1717 DEINT_L5(%%mm0, %%mm1, (%0, %1, 4) , (%%REGd) , (%%REGd, %1) )
1718 DEINT_L5(%%mm1, %%mm0, (%%REGd) , (%%REGd, %1) , (%%REGd, %1, 2))
1719 DEINT_L5(%%mm0, %%mm1, (%%REGd, %1) , (%%REGd, %1, 2), (%0, %1, 8) )
1720 DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4))
1722 "movq %%mm0, (%2) \n\t" 1723 "movq %%mm1, (%3) \n\t" 1725 :
"%"REG_a,
"%"REG_d
1727 #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 1737 src[stride*1]=
CLIP((-(t2 +
src[stride*3]) + 2*(t3 +
src[stride*2]) + 6*t1 + 4)>>3);
1739 src[stride*2]=
CLIP((-(t3 +
src[stride*4]) + 2*(t1 +
src[stride*3]) + 6*t2 + 4)>>3);
1741 src[stride*3]=
CLIP((-(t1 +
src[stride*5]) + 2*(t2 +
src[stride*4]) + 6*t3 + 4)>>3);
1743 src[stride*4]=
CLIP((-(t2 +
src[stride*6]) + 2*(t3 +
src[stride*5]) + 6*t1 + 4)>>3);
1745 src[stride*5]=
CLIP((-(t3 +
src[stride*7]) + 2*(t1 +
src[stride*6]) + 6*t2 + 4)>>3);
1747 src[stride*6]=
CLIP((-(t1 +
src[stride*8]) + 2*(t2 +
src[stride*7]) + 6*t3 + 4)>>3);
1749 src[stride*7]=
CLIP((-(t2 +
src[stride*9]) + 2*(t3 +
src[stride*8]) + 6*t1 + 4)>>3);
1756 #endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 1768 #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 1771 "lea (%0, %1), %%"REG_a
" \n\t" 1772 "lea (%%"REG_a
", %1, 4), %%"REG_d
" \n\t" 1776 "movq (%2), %%mm0 \n\t" 1777 "movq (%%"REG_a
"), %%mm1 \n\t" 1779 "movq (%0), %%mm2 \n\t" 1781 "movq %%mm0, (%0) \n\t" 1782 "movq (%%"REG_a
", %1), %%mm0 \n\t" 1785 "movq %%mm2, (%%"REG_a
") \n\t" 1786 "movq (%%"REG_a
", %1, 2), %%mm2 \n\t" 1789 "movq %%mm1, (%%"REG_a
", %1) \n\t" 1790 "movq (%0, %1, 4), %%mm1 \n\t" 1793 "movq %%mm0, (%%"REG_a
", %1, 2) \n\t" 1794 "movq (%%"REG_d
"), %%mm0 \n\t" 1797 "movq %%mm2, (%0, %1, 4) \n\t" 1798 "movq (%%"REG_d
", %1), %%mm2 \n\t" 1801 "movq %%mm1, (%%"REG_d
") \n\t" 1802 "movq (%%"REG_d
", %1, 2), %%mm1 \n\t" 1805 "movq %%mm0, (%%"REG_d
", %1) \n\t" 1806 "movq (%0, %1, 8), %%mm0 \n\t" 1809 "movq %%mm2, (%%"REG_d
", %1, 2) \n\t" 1810 "movq %%mm1, (%2) \n\t" 1813 :
"%"REG_a,
"%"REG_d
1815 #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 1820 a= *(uint32_t*)&tmp[
stride*0];
1823 a= (a&
c) + (((a^c)&0xFEFEFEFEUL)>>1);
1824 *(uint32_t*)&
src[
stride*0]= (a|b) - (((a^
b)&0xFEFEFEFEUL)>>1);
1827 b= (a&
b) + (((a^b)&0xFEFEFEFEUL)>>1);
1828 *(uint32_t*)&
src[
stride*1]= (c|b) - (((c^
b)&0xFEFEFEFEUL)>>1);
1831 c= (b&
c) + (((b^c)&0xFEFEFEFEUL)>>1);
1832 *(uint32_t*)&
src[
stride*2]= (c|a) - (((c^
a)&0xFEFEFEFEUL)>>1);
1835 a= (a&
c) + (((a^c)&0xFEFEFEFEUL)>>1);
1836 *(uint32_t*)&
src[
stride*3]= (a|b) - (((a^
b)&0xFEFEFEFEUL)>>1);
1839 b= (a&
b) + (((a^b)&0xFEFEFEFEUL)>>1);
1840 *(uint32_t*)&
src[
stride*4]= (c|b) - (((c^
b)&0xFEFEFEFEUL)>>1);
1843 c= (b&
c) + (((b^c)&0xFEFEFEFEUL)>>1);
1844 *(uint32_t*)&
src[
stride*5]= (c|a) - (((c^
a)&0xFEFEFEFEUL)>>1);
1847 a= (a&
c) + (((a^c)&0xFEFEFEFEUL)>>1);
1848 *(uint32_t*)&
src[
stride*6]= (a|b) - (((a^
b)&0xFEFEFEFEUL)>>1);
1851 b= (a&
b) + (((a^b)&0xFEFEFEFEUL)>>1);
1852 *(uint32_t*)&
src[
stride*7]= (c|b) - (((c^
b)&0xFEFEFEFEUL)>>1);
1854 *(uint32_t*)&tmp[
stride*0]= c;
1858 #endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 1871 #if TEMPLATE_PP_MMXEXT 1873 "lea (%0, %1), %%"REG_a
" \n\t" 1874 "lea (%%"REG_a
", %1, 4), %%"REG_d
" \n\t" 1878 "movq (%0), %%mm0 \n\t" 1879 "movq (%%"REG_a
", %1), %%mm2 \n\t" 1880 "movq (%%"REG_a
"), %%mm1 \n\t" 1881 "movq %%mm0, %%mm3 \n\t" 1882 "pmaxub %%mm1, %%mm0 \n\t" 1883 "pminub %%mm3, %%mm1 \n\t" 1884 "pmaxub %%mm2, %%mm1 \n\t" 1885 "pminub %%mm1, %%mm0 \n\t" 1886 "movq %%mm0, (%%"REG_a
") \n\t" 1888 "movq (%0, %1, 4), %%mm0 \n\t" 1889 "movq (%%"REG_a
", %1, 2), %%mm1 \n\t" 1890 "movq %%mm2, %%mm3 \n\t" 1891 "pmaxub %%mm1, %%mm2 \n\t" 1892 "pminub %%mm3, %%mm1 \n\t" 1893 "pmaxub %%mm0, %%mm1 \n\t" 1894 "pminub %%mm1, %%mm2 \n\t" 1895 "movq %%mm2, (%%"REG_a
", %1, 2) \n\t" 1897 "movq (%%"REG_d
"), %%mm2 \n\t" 1898 "movq (%%"REG_d
", %1), %%mm1 \n\t" 1899 "movq %%mm2, %%mm3 \n\t" 1900 "pmaxub %%mm0, %%mm2 \n\t" 1901 "pminub %%mm3, %%mm0 \n\t" 1902 "pmaxub %%mm1, %%mm0 \n\t" 1903 "pminub %%mm0, %%mm2 \n\t" 1904 "movq %%mm2, (%%"REG_d
") \n\t" 1906 "movq (%%"REG_d
", %1, 2), %%mm2 \n\t" 1907 "movq (%0, %1, 8), %%mm0 \n\t" 1908 "movq %%mm2, %%mm3 \n\t" 1909 "pmaxub %%mm0, %%mm2 \n\t" 1910 "pminub %%mm3, %%mm0 \n\t" 1911 "pmaxub %%mm1, %%mm0 \n\t" 1912 "pminub %%mm0, %%mm2 \n\t" 1913 "movq %%mm2, (%%"REG_d
", %1, 2) \n\t" 1917 :
"%"REG_a,
"%"REG_d
1920 #else // MMX without MMX2 1922 "lea (%0, %1), %%"REG_a
" \n\t" 1923 "lea (%%"REG_a
", %1, 4), %%"REG_d
" \n\t" 1926 "pxor %%mm7, %%mm7 \n\t" 1928 #define REAL_MEDIAN(a,b,c)\ 1929 "movq " #a ", %%mm0 \n\t"\ 1930 "movq " #b ", %%mm2 \n\t"\ 1931 "movq " #c ", %%mm1 \n\t"\ 1932 "movq %%mm0, %%mm3 \n\t"\ 1933 "movq %%mm1, %%mm4 \n\t"\ 1934 "movq %%mm2, %%mm5 \n\t"\ 1935 "psubusb %%mm1, %%mm3 \n\t"\ 1936 "psubusb %%mm2, %%mm4 \n\t"\ 1937 "psubusb %%mm0, %%mm5 \n\t"\ 1938 "pcmpeqb %%mm7, %%mm3 \n\t"\ 1939 "pcmpeqb %%mm7, %%mm4 \n\t"\ 1940 "pcmpeqb %%mm7, %%mm5 \n\t"\ 1941 "movq %%mm3, %%mm6 \n\t"\ 1942 "pxor %%mm4, %%mm3 \n\t"\ 1943 "pxor %%mm5, %%mm4 \n\t"\ 1944 "pxor %%mm6, %%mm5 \n\t"\ 1945 "por %%mm3, %%mm1 \n\t"\ 1946 "por %%mm4, %%mm2 \n\t"\ 1947 "por %%mm5, %%mm0 \n\t"\ 1948 "pand %%mm2, %%mm0 \n\t"\ 1949 "pand %%mm1, %%mm0 \n\t"\ 1950 "movq %%mm0, " #b " \n\t" 1951 #define MEDIAN(a,b,c) REAL_MEDIAN(a,b,c) 1953 MEDIAN((%0) , (%%REGa) , (%%REGa, %1))
1954 MEDIAN((%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4))
1955 MEDIAN((%0, %1, 4) , (%%REGd) , (%%REGd, %1))
1956 MEDIAN((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8))
1959 :
"%"REG_a,
"%"REG_d
1961 #endif //TEMPLATE_PP_MMXEXT 1962 #else //TEMPLATE_PP_MMX 1968 for (y=0; y<4; y++){
1969 int a,
b,
c,
d, e,
f;
1976 colsrc[
stride ] = (a|(d^
f)) & (b|(d^e)) & (c|(e^
f));
1981 #endif //TEMPLATE_PP_MMX 1991 "lea (%0, %1), %%"REG_a
" \n\t" 1994 "movq (%0), %%mm0 \n\t" 1995 "movq (%%"REG_a
"), %%mm1 \n\t" 1996 "movq %%mm0, %%mm2 \n\t" 1997 "punpcklbw %%mm1, %%mm0 \n\t" 1998 "punpckhbw %%mm1, %%mm2 \n\t" 2000 "movq (%%"REG_a
", %1), %%mm1 \n\t" 2001 "movq (%%"REG_a
", %1, 2), %%mm3 \n\t" 2002 "movq %%mm1, %%mm4 \n\t" 2003 "punpcklbw %%mm3, %%mm1 \n\t" 2004 "punpckhbw %%mm3, %%mm4 \n\t" 2006 "movq %%mm0, %%mm3 \n\t" 2007 "punpcklwd %%mm1, %%mm0 \n\t" 2008 "punpckhwd %%mm1, %%mm3 \n\t" 2009 "movq %%mm2, %%mm1 \n\t" 2010 "punpcklwd %%mm4, %%mm2 \n\t" 2011 "punpckhwd %%mm4, %%mm1 \n\t" 2013 "movd %%mm0, 128(%2) \n\t" 2014 "psrlq $32, %%mm0 \n\t" 2015 "movd %%mm0, 144(%2) \n\t" 2016 "movd %%mm3, 160(%2) \n\t" 2017 "psrlq $32, %%mm3 \n\t" 2018 "movd %%mm3, 176(%2) \n\t" 2019 "movd %%mm3, 48(%3) \n\t" 2020 "movd %%mm2, 192(%2) \n\t" 2021 "movd %%mm2, 64(%3) \n\t" 2022 "psrlq $32, %%mm2 \n\t" 2023 "movd %%mm2, 80(%3) \n\t" 2024 "movd %%mm1, 96(%3) \n\t" 2025 "psrlq $32, %%mm1 \n\t" 2026 "movd %%mm1, 112(%3) \n\t" 2028 "lea (%%"REG_a
", %1, 4), %%"REG_a
" \n\t" 2030 "movq (%0, %1, 4), %%mm0 \n\t" 2031 "movq (%%"REG_a
"), %%mm1 \n\t" 2032 "movq %%mm0, %%mm2 \n\t" 2033 "punpcklbw %%mm1, %%mm0 \n\t" 2034 "punpckhbw %%mm1, %%mm2 \n\t" 2036 "movq (%%"REG_a
", %1), %%mm1 \n\t" 2037 "movq (%%"REG_a
", %1, 2), %%mm3 \n\t" 2038 "movq %%mm1, %%mm4 \n\t" 2039 "punpcklbw %%mm3, %%mm1 \n\t" 2040 "punpckhbw %%mm3, %%mm4 \n\t" 2042 "movq %%mm0, %%mm3 \n\t" 2043 "punpcklwd %%mm1, %%mm0 \n\t" 2044 "punpckhwd %%mm1, %%mm3 \n\t" 2045 "movq %%mm2, %%mm1 \n\t" 2046 "punpcklwd %%mm4, %%mm2 \n\t" 2047 "punpckhwd %%mm4, %%mm1 \n\t" 2049 "movd %%mm0, 132(%2) \n\t" 2050 "psrlq $32, %%mm0 \n\t" 2051 "movd %%mm0, 148(%2) \n\t" 2052 "movd %%mm3, 164(%2) \n\t" 2053 "psrlq $32, %%mm3 \n\t" 2054 "movd %%mm3, 180(%2) \n\t" 2055 "movd %%mm3, 52(%3) \n\t" 2056 "movd %%mm2, 196(%2) \n\t" 2057 "movd %%mm2, 68(%3) \n\t" 2058 "psrlq $32, %%mm2 \n\t" 2059 "movd %%mm2, 84(%3) \n\t" 2060 "movd %%mm1, 100(%3) \n\t" 2061 "psrlq $32, %%mm1 \n\t" 2062 "movd %%mm1, 116(%3) \n\t" 2065 ::
"r" (
src),
"r" ((
x86_reg)srcStride),
"r" (dst1),
"r" (dst2)
2076 "lea (%0, %1), %%"REG_a
" \n\t" 2077 "lea (%%"REG_a
",%1,4), %%"REG_d
" \n\t" 2080 "movq (%2), %%mm0 \n\t" 2081 "movq 16(%2), %%mm1 \n\t" 2082 "movq %%mm0, %%mm2 \n\t" 2083 "punpcklbw %%mm1, %%mm0 \n\t" 2084 "punpckhbw %%mm1, %%mm2 \n\t" 2086 "movq 32(%2), %%mm1 \n\t" 2087 "movq 48(%2), %%mm3 \n\t" 2088 "movq %%mm1, %%mm4 \n\t" 2089 "punpcklbw %%mm3, %%mm1 \n\t" 2090 "punpckhbw %%mm3, %%mm4 \n\t" 2092 "movq %%mm0, %%mm3 \n\t" 2093 "punpcklwd %%mm1, %%mm0 \n\t" 2094 "punpckhwd %%mm1, %%mm3 \n\t" 2095 "movq %%mm2, %%mm1 \n\t" 2096 "punpcklwd %%mm4, %%mm2 \n\t" 2097 "punpckhwd %%mm4, %%mm1 \n\t" 2099 "movd %%mm0, (%0) \n\t" 2100 "psrlq $32, %%mm0 \n\t" 2101 "movd %%mm0, (%%"REG_a
") \n\t" 2102 "movd %%mm3, (%%"REG_a
", %1) \n\t" 2103 "psrlq $32, %%mm3 \n\t" 2104 "movd %%mm3, (%%"REG_a
", %1, 2) \n\t" 2105 "movd %%mm2, (%0, %1, 4) \n\t" 2106 "psrlq $32, %%mm2 \n\t" 2107 "movd %%mm2, (%%"REG_d
") \n\t" 2108 "movd %%mm1, (%%"REG_d
", %1) \n\t" 2109 "psrlq $32, %%mm1 \n\t" 2110 "movd %%mm1, (%%"REG_d
", %1, 2) \n\t" 2113 "movq 64(%2), %%mm0 \n\t" 2114 "movq 80(%2), %%mm1 \n\t" 2115 "movq %%mm0, %%mm2 \n\t" 2116 "punpcklbw %%mm1, %%mm0 \n\t" 2117 "punpckhbw %%mm1, %%mm2 \n\t" 2119 "movq 96(%2), %%mm1 \n\t" 2120 "movq 112(%2), %%mm3 \n\t" 2121 "movq %%mm1, %%mm4 \n\t" 2122 "punpcklbw %%mm3, %%mm1 \n\t" 2123 "punpckhbw %%mm3, %%mm4 \n\t" 2125 "movq %%mm0, %%mm3 \n\t" 2126 "punpcklwd %%mm1, %%mm0 \n\t" 2127 "punpckhwd %%mm1, %%mm3 \n\t" 2128 "movq %%mm2, %%mm1 \n\t" 2129 "punpcklwd %%mm4, %%mm2 \n\t" 2130 "punpckhwd %%mm4, %%mm1 \n\t" 2132 "movd %%mm0, 4(%0) \n\t" 2133 "psrlq $32, %%mm0 \n\t" 2134 "movd %%mm0, 4(%%"REG_a
") \n\t" 2135 "movd %%mm3, 4(%%"REG_a
", %1) \n\t" 2136 "psrlq $32, %%mm3 \n\t" 2137 "movd %%mm3, 4(%%"REG_a
", %1, 2) \n\t" 2138 "movd %%mm2, 4(%0, %1, 4) \n\t" 2139 "psrlq $32, %%mm2 \n\t" 2140 "movd %%mm2, 4(%%"REG_d
") \n\t" 2141 "movd %%mm1, 4(%%"REG_d
", %1) \n\t" 2142 "psrlq $32, %%mm1 \n\t" 2143 "movd %%mm1, 4(%%"REG_d
", %1, 2) \n\t" 2146 :
"%"REG_a,
"%"REG_d
2149 #endif //TEMPLATE_PP_MMX 2152 #if !TEMPLATE_PP_ALTIVEC 2154 uint8_t *tempBlurred, uint32_t *tempBlurredPast,
int *maxNoise)
2157 tempBlurredPast[127]= maxNoise[0];
2158 tempBlurredPast[128]= maxNoise[1];
2159 tempBlurredPast[129]= maxNoise[2];
2161 #define FAST_L2_DIFF 2163 #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 2165 "lea (%2, %2, 2), %%"REG_a
" \n\t" 2166 "lea (%2, %2, 4), %%"REG_d
" \n\t" 2167 "lea (%%"REG_d
", %2, 2), %%"REG_c
" \n\t" 2171 #ifdef L1_DIFF //needs mmx2 2172 "movq (%0), %%mm0 \n\t" 2173 "psadbw (%1), %%mm0 \n\t" 2174 "movq (%0, %2), %%mm1 \n\t" 2175 "psadbw (%1, %2), %%mm1 \n\t" 2176 "movq (%0, %2, 2), %%mm2 \n\t" 2177 "psadbw (%1, %2, 2), %%mm2 \n\t" 2178 "movq (%0, %%"REG_a
"), %%mm3 \n\t" 2179 "psadbw (%1, %%"REG_a
"), %%mm3 \n\t" 2181 "movq (%0, %2, 4), %%mm4 \n\t" 2182 "paddw %%mm1, %%mm0 \n\t" 2183 "psadbw (%1, %2, 4), %%mm4 \n\t" 2184 "movq (%0, %%"REG_d
"), %%mm5 \n\t" 2185 "paddw %%mm2, %%mm0 \n\t" 2186 "psadbw (%1, %%"REG_d
"), %%mm5 \n\t" 2187 "movq (%0, %%"REG_a
", 2), %%mm6 \n\t" 2188 "paddw %%mm3, %%mm0 \n\t" 2189 "psadbw (%1, %%"REG_a
", 2), %%mm6 \n\t" 2190 "movq (%0, %%"REG_c
"), %%mm7 \n\t" 2191 "paddw %%mm4, %%mm0 \n\t" 2192 "psadbw (%1, %%"REG_c
"), %%mm7 \n\t" 2193 "paddw %%mm5, %%mm6 \n\t" 2194 "paddw %%mm7, %%mm6 \n\t" 2195 "paddw %%mm6, %%mm0 \n\t" 2197 #if defined (FAST_L2_DIFF) 2198 "pcmpeqb %%mm7, %%mm7 \n\t" 2199 "movq "MANGLE(b80)
", %%mm6 \n\t" 2200 "pxor %%mm0, %%mm0 \n\t" 2201 #define REAL_L2_DIFF_CORE(a, b)\ 2202 "movq " #a ", %%mm5 \n\t"\ 2203 "movq " #b ", %%mm2 \n\t"\ 2204 "pxor %%mm7, %%mm2 \n\t"\ 2205 PAVGB(%%mm2, %%mm5)\ 2206 "paddb %%mm6, %%mm5 \n\t"\ 2207 "movq %%mm5, %%mm2 \n\t"\ 2208 "psllw $8, %%mm5 \n\t"\ 2209 "pmaddwd %%mm5, %%mm5 \n\t"\ 2210 "pmaddwd %%mm2, %%mm2 \n\t"\ 2211 "paddd %%mm2, %%mm5 \n\t"\ 2212 "psrld $14, %%mm5 \n\t"\ 2213 "paddd %%mm5, %%mm0 \n\t" 2215 #else //defined (FAST_L2_DIFF) 2216 "pxor %%mm7, %%mm7 \n\t" 2217 "pxor %%mm0, %%mm0 \n\t" 2218 #define REAL_L2_DIFF_CORE(a, b)\ 2219 "movq " #a ", %%mm5 \n\t"\ 2220 "movq " #b ", %%mm2 \n\t"\ 2221 "movq %%mm5, %%mm1 \n\t"\ 2222 "movq %%mm2, %%mm3 \n\t"\ 2223 "punpcklbw %%mm7, %%mm5 \n\t"\ 2224 "punpckhbw %%mm7, %%mm1 \n\t"\ 2225 "punpcklbw %%mm7, %%mm2 \n\t"\ 2226 "punpckhbw %%mm7, %%mm3 \n\t"\ 2227 "psubw %%mm2, %%mm5 \n\t"\ 2228 "psubw %%mm3, %%mm1 \n\t"\ 2229 "pmaddwd %%mm5, %%mm5 \n\t"\ 2230 "pmaddwd %%mm1, %%mm1 \n\t"\ 2231 "paddd %%mm1, %%mm5 \n\t"\ 2232 "paddd %%mm5, %%mm0 \n\t" 2234 #endif //defined (FAST_L2_DIFF) 2236 #define L2_DIFF_CORE(a, b) REAL_L2_DIFF_CORE(a, b) 2238 L2_DIFF_CORE((%0) , (%1))
2239 L2_DIFF_CORE((%0, %2) , (%1, %2))
2240 L2_DIFF_CORE((%0, %2, 2) , (%1, %2, 2))
2241 L2_DIFF_CORE((%0, %%REGa) , (%1, %%REGa))
2242 L2_DIFF_CORE((%0, %2, 4) , (%1, %2, 4))
2243 L2_DIFF_CORE((%0, %%REGd) , (%1, %%REGd))
2244 L2_DIFF_CORE((%0, %%REGa,2), (%1, %%REGa,2))
2245 L2_DIFF_CORE((%0, %%REGc) , (%1, %%REGc))
2249 "movq %%mm0, %%mm4 \n\t" 2250 "psrlq $32, %%mm0 \n\t" 2251 "paddd %%mm0, %%mm4 \n\t" 2252 "movd %%mm4, %%ecx \n\t" 2253 "shll $2, %%ecx \n\t" 2254 "mov %3, %%"REG_d
" \n\t" 2255 "addl -4(%%"REG_d
"), %%ecx \n\t" 2256 "addl 4(%%"REG_d
"), %%ecx \n\t" 2257 "addl -1024(%%"REG_d
"), %%ecx \n\t" 2258 "addl $4, %%ecx \n\t" 2259 "addl 1024(%%"REG_d
"), %%ecx \n\t" 2260 "shrl $3, %%ecx \n\t" 2261 "movl %%ecx, (%%"REG_d
") \n\t" 2266 "cmpl 512(%%"REG_d
"), %%ecx \n\t" 2268 "cmpl 516(%%"REG_d
"), %%ecx \n\t" 2271 "lea (%%"REG_a
", %2, 2), %%"REG_d
" \n\t" 2272 "lea (%%"REG_d
", %2, 2), %%"REG_c
" \n\t" 2273 "movq (%0), %%mm0 \n\t" 2274 "movq (%0, %2), %%mm1 \n\t" 2275 "movq (%0, %2, 2), %%mm2 \n\t" 2276 "movq (%0, %%"REG_a
"), %%mm3 \n\t" 2277 "movq (%0, %2, 4), %%mm4 \n\t" 2278 "movq (%0, %%"REG_d
"), %%mm5 \n\t" 2279 "movq (%0, %%"REG_a
", 2), %%mm6 \n\t" 2280 "movq (%0, %%"REG_c
"), %%mm7 \n\t" 2281 "movq %%mm0, (%1) \n\t" 2282 "movq %%mm1, (%1, %2) \n\t" 2283 "movq %%mm2, (%1, %2, 2) \n\t" 2284 "movq %%mm3, (%1, %%"REG_a
") \n\t" 2285 "movq %%mm4, (%1, %2, 4) \n\t" 2286 "movq %%mm5, (%1, %%"REG_d
") \n\t" 2287 "movq %%mm6, (%1, %%"REG_a
", 2) \n\t" 2288 "movq %%mm7, (%1, %%"REG_c
") \n\t" 2292 "lea (%%"REG_a
", %2, 2), %%"REG_d
" \n\t" 2293 "lea (%%"REG_d
", %2, 2), %%"REG_c
" \n\t" 2294 "movq (%0), %%mm0 \n\t" 2296 "movq (%0, %2), %%mm1 \n\t" 2297 PAVGB((%1, %2), %%mm1)
2298 "movq (%0, %2, 2), %%mm2 \n\t" 2299 PAVGB((%1, %2, 2), %%mm2)
2300 "movq (%0, %%"REG_a
"), %%mm3 \n\t" 2301 PAVGB((%1, %%REGa), %%mm3)
2302 "movq (%0, %2, 4), %%mm4 \n\t" 2303 PAVGB((%1, %2, 4), %%mm4)
2304 "movq (%0, %%"REG_d
"), %%mm5 \n\t" 2305 PAVGB((%1, %%REGd), %%mm5)
2306 "movq (%0, %%"REG_a
", 2), %%mm6 \n\t" 2307 PAVGB((%1, %%REGa, 2), %%mm6)
2308 "movq (%0, %%"REG_c
"), %%mm7 \n\t" 2309 PAVGB((%1, %%REGc), %%mm7)
2310 "movq %%mm0, (%1) \n\t" 2311 "movq %%mm1, (%1, %2) \n\t" 2312 "movq %%mm2, (%1, %2, 2) \n\t" 2313 "movq %%mm3, (%1, %%"REG_a
") \n\t" 2314 "movq %%mm4, (%1, %2, 4) \n\t" 2315 "movq %%mm5, (%1, %%"REG_d
") \n\t" 2316 "movq %%mm6, (%1, %%"REG_a
", 2) \n\t" 2317 "movq %%mm7, (%1, %%"REG_c
") \n\t" 2318 "movq %%mm0, (%0) \n\t" 2319 "movq %%mm1, (%0, %2) \n\t" 2320 "movq %%mm2, (%0, %2, 2) \n\t" 2321 "movq %%mm3, (%0, %%"REG_a
") \n\t" 2322 "movq %%mm4, (%0, %2, 4) \n\t" 2323 "movq %%mm5, (%0, %%"REG_d
") \n\t" 2324 "movq %%mm6, (%0, %%"REG_a
", 2) \n\t" 2325 "movq %%mm7, (%0, %%"REG_c
") \n\t" 2329 "cmpl 508(%%"REG_d
"), %%ecx \n\t" 2332 "lea (%%"REG_a
", %2, 2), %%"REG_d
" \n\t" 2333 "lea (%%"REG_d
", %2, 2), %%"REG_c
" \n\t" 2334 "movq (%0), %%mm0 \n\t" 2335 "movq (%0, %2), %%mm1 \n\t" 2336 "movq (%0, %2, 2), %%mm2 \n\t" 2337 "movq (%0, %%"REG_a
"), %%mm3 \n\t" 2338 "movq (%1), %%mm4 \n\t" 2339 "movq (%1, %2), %%mm5 \n\t" 2340 "movq (%1, %2, 2), %%mm6 \n\t" 2341 "movq (%1, %%"REG_a
"), %%mm7 \n\t" 2350 "movq %%mm0, (%1) \n\t" 2351 "movq %%mm1, (%1, %2) \n\t" 2352 "movq %%mm2, (%1, %2, 2) \n\t" 2353 "movq %%mm3, (%1, %%"REG_a
") \n\t" 2354 "movq %%mm0, (%0) \n\t" 2355 "movq %%mm1, (%0, %2) \n\t" 2356 "movq %%mm2, (%0, %2, 2) \n\t" 2357 "movq %%mm3, (%0, %%"REG_a
") \n\t" 2359 "movq (%0, %2, 4), %%mm0 \n\t" 2360 "movq (%0, %%"REG_d
"), %%mm1 \n\t" 2361 "movq (%0, %%"REG_a
", 2), %%mm2 \n\t" 2362 "movq (%0, %%"REG_c
"), %%mm3 \n\t" 2363 "movq (%1, %2, 4), %%mm4 \n\t" 2364 "movq (%1, %%"REG_d
"), %%mm5 \n\t" 2365 "movq (%1, %%"REG_a
", 2), %%mm6 \n\t" 2366 "movq (%1, %%"REG_c
"), %%mm7 \n\t" 2375 "movq %%mm0, (%1, %2, 4) \n\t" 2376 "movq %%mm1, (%1, %%"REG_d
") \n\t" 2377 "movq %%mm2, (%1, %%"REG_a
", 2) \n\t" 2378 "movq %%mm3, (%1, %%"REG_c
") \n\t" 2379 "movq %%mm0, (%0, %2, 4) \n\t" 2380 "movq %%mm1, (%0, %%"REG_d
") \n\t" 2381 "movq %%mm2, (%0, %%"REG_a
", 2) \n\t" 2382 "movq %%mm3, (%0, %%"REG_c
") \n\t" 2386 "lea (%%"REG_a
", %2, 2), %%"REG_d
" \n\t" 2387 "lea (%%"REG_d
", %2, 2), %%"REG_c
" \n\t" 2388 "movq (%0), %%mm0 \n\t" 2389 "movq (%0, %2), %%mm1 \n\t" 2390 "movq (%0, %2, 2), %%mm2 \n\t" 2391 "movq (%0, %%"REG_a
"), %%mm3 \n\t" 2392 "movq (%1), %%mm4 \n\t" 2393 "movq (%1, %2), %%mm5 \n\t" 2394 "movq (%1, %2, 2), %%mm6 \n\t" 2395 "movq (%1, %%"REG_a
"), %%mm7 \n\t" 2408 "movq %%mm0, (%1) \n\t" 2409 "movq %%mm1, (%1, %2) \n\t" 2410 "movq %%mm2, (%1, %2, 2) \n\t" 2411 "movq %%mm3, (%1, %%"REG_a
") \n\t" 2412 "movq %%mm0, (%0) \n\t" 2413 "movq %%mm1, (%0, %2) \n\t" 2414 "movq %%mm2, (%0, %2, 2) \n\t" 2415 "movq %%mm3, (%0, %%"REG_a
") \n\t" 2417 "movq (%0, %2, 4), %%mm0 \n\t" 2418 "movq (%0, %%"REG_d
"), %%mm1 \n\t" 2419 "movq (%0, %%"REG_a
", 2), %%mm2 \n\t" 2420 "movq (%0, %%"REG_c
"), %%mm3 \n\t" 2421 "movq (%1, %2, 4), %%mm4 \n\t" 2422 "movq (%1, %%"REG_d
"), %%mm5 \n\t" 2423 "movq (%1, %%"REG_a
", 2), %%mm6 \n\t" 2424 "movq (%1, %%"REG_c
"), %%mm7 \n\t" 2437 "movq %%mm0, (%1, %2, 4) \n\t" 2438 "movq %%mm1, (%1, %%"REG_d
") \n\t" 2439 "movq %%mm2, (%1, %%"REG_a
", 2) \n\t" 2440 "movq %%mm3, (%1, %%"REG_c
") \n\t" 2441 "movq %%mm0, (%0, %2, 4) \n\t" 2442 "movq %%mm1, (%0, %%"REG_d
") \n\t" 2443 "movq %%mm2, (%0, %%"REG_a
", 2) \n\t" 2444 "movq %%mm3, (%0, %%"REG_c
") \n\t" 2448 ::
"r" (
src),
"r" (tempBlurred),
"r"((
x86_reg)
stride),
"m" (tempBlurredPast)
2449 :
"%"REG_a,
"%"REG_d,
"%"REG_c,
"memory" 2451 #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 2474 +(*(tempBlurredPast-256))
2475 +(*(tempBlurredPast-1))+ (*(tempBlurredPast+1))
2476 +(*(tempBlurredPast+256))
2488 if(d > maxNoise[1]){
2489 if(d < maxNoise[2]){
2495 tempBlurred[ x + y*
stride ]=
2509 if(d < maxNoise[0]){
2515 tempBlurred[ x + y*
stride ]=
2517 (ref*7 + cur + 4)>>3;
2526 tempBlurred[ x + y*
stride ]=
2528 (ref*3 + cur + 2)>>2;
2534 #endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 2536 #endif //TEMPLATE_PP_ALTIVEC 2543 int64_t dc_mask, eq_mask, both_masks;
2544 int64_t sums[10*8*2];
2548 "movq %0, %%mm7 \n\t" 2549 "movq %1, %%mm6 \n\t" 2550 : :
"m" (
c->mmxDcOffset[
c->nonBQP]),
"m" (
c->mmxDcThreshold[
c->nonBQP])
2554 "lea (%2, %3), %%"REG_a
" \n\t" 2558 "movq (%2), %%mm0 \n\t" 2559 "movq (%%"REG_a
"), %%mm1 \n\t" 2560 "movq %%mm1, %%mm3 \n\t" 2561 "movq %%mm1, %%mm4 \n\t" 2562 "psubb %%mm1, %%mm0 \n\t" 2563 "paddb %%mm7, %%mm0 \n\t" 2564 "pcmpgtb %%mm6, %%mm0 \n\t" 2566 "movq (%%"REG_a
",%3), %%mm2 \n\t" 2567 PMAXUB(%%mm2, %%mm4)
2568 PMINUB(%%mm2, %%mm3, %%mm5)
2569 "psubb %%mm2, %%mm1 \n\t" 2570 "paddb %%mm7, %%mm1 \n\t" 2571 "pcmpgtb %%mm6, %%mm1 \n\t" 2572 "paddb %%mm1, %%mm0 \n\t" 2574 "movq (%%"REG_a
", %3, 2), %%mm1 \n\t" 2575 PMAXUB(%%mm1, %%mm4)
2576 PMINUB(%%mm1, %%mm3, %%mm5)
2577 "psubb %%mm1, %%mm2 \n\t" 2578 "paddb %%mm7, %%mm2 \n\t" 2579 "pcmpgtb %%mm6, %%mm2 \n\t" 2580 "paddb %%mm2, %%mm0 \n\t" 2582 "lea (%%"REG_a
", %3, 4), %%"REG_a
" \n\t" 2584 "movq (%2, %3, 4), %%mm2 \n\t" 2585 PMAXUB(%%mm2, %%mm4)
2586 PMINUB(%%mm2, %%mm3, %%mm5)
2587 "psubb %%mm2, %%mm1 \n\t" 2588 "paddb %%mm7, %%mm1 \n\t" 2589 "pcmpgtb %%mm6, %%mm1 \n\t" 2590 "paddb %%mm1, %%mm0 \n\t" 2592 "movq (%%"REG_a
"), %%mm1 \n\t" 2593 PMAXUB(%%mm1, %%mm4)
2594 PMINUB(%%mm1, %%mm3, %%mm5)
2595 "psubb %%mm1, %%mm2 \n\t" 2596 "paddb %%mm7, %%mm2 \n\t" 2597 "pcmpgtb %%mm6, %%mm2 \n\t" 2598 "paddb %%mm2, %%mm0 \n\t" 2600 "movq (%%"REG_a
", %3), %%mm2 \n\t" 2601 PMAXUB(%%mm2, %%mm4)
2602 PMINUB(%%mm2, %%mm3, %%mm5)
2603 "psubb %%mm2, %%mm1 \n\t" 2604 "paddb %%mm7, %%mm1 \n\t" 2605 "pcmpgtb %%mm6, %%mm1 \n\t" 2606 "paddb %%mm1, %%mm0 \n\t" 2608 "movq (%%"REG_a
", %3, 2), %%mm1 \n\t" 2609 PMAXUB(%%mm1, %%mm4)
2610 PMINUB(%%mm1, %%mm3, %%mm5)
2611 "psubb %%mm1, %%mm2 \n\t" 2612 "paddb %%mm7, %%mm2 \n\t" 2613 "pcmpgtb %%mm6, %%mm2 \n\t" 2614 "paddb %%mm2, %%mm0 \n\t" 2616 "movq (%2, %3, 8), %%mm2 \n\t" 2617 PMAXUB(%%mm2, %%mm4)
2618 PMINUB(%%mm2, %%mm3, %%mm5)
2619 "psubb %%mm2, %%mm1 \n\t" 2620 "paddb %%mm7, %%mm1 \n\t" 2621 "pcmpgtb %%mm6, %%mm1 \n\t" 2622 "paddb %%mm1, %%mm0 \n\t" 2624 "movq (%%"REG_a
", %3, 4), %%mm1 \n\t" 2625 "psubb %%mm1, %%mm2 \n\t" 2626 "paddb %%mm7, %%mm2 \n\t" 2627 "pcmpgtb %%mm6, %%mm2 \n\t" 2628 "paddb %%mm2, %%mm0 \n\t" 2629 "psubusb %%mm3, %%mm4 \n\t" 2631 "pxor %%mm6, %%mm6 \n\t" 2632 "movq %4, %%mm7 \n\t" 2633 "paddusb %%mm7, %%mm7 \n\t" 2634 "psubusb %%mm4, %%mm7 \n\t" 2635 "pcmpeqb %%mm6, %%mm7 \n\t" 2636 "pcmpeqb %%mm6, %%mm7 \n\t" 2637 "movq %%mm7, %1 \n\t" 2639 "movq %5, %%mm7 \n\t" 2640 "punpcklbw %%mm7, %%mm7 \n\t" 2641 "punpcklbw %%mm7, %%mm7 \n\t" 2642 "punpcklbw %%mm7, %%mm7 \n\t" 2643 "psubb %%mm0, %%mm6 \n\t" 2644 "pcmpgtb %%mm7, %%mm6 \n\t" 2645 "movq %%mm6, %0 \n\t" 2647 :
"=m" (eq_mask),
"=m" (dc_mask)
2648 :
"r" (
src),
"r" ((
x86_reg)
step),
"m" (
c->pQPb),
"m"(
c->ppMode.flatnessThreshold)
2652 both_masks = dc_mask & eq_mask;
2656 int64_t *temp_sums= sums;
2659 "movq %2, %%mm0 \n\t" 2660 "pxor %%mm4, %%mm4 \n\t" 2662 "movq (%0), %%mm6 \n\t" 2663 "movq (%0, %1), %%mm5 \n\t" 2664 "movq %%mm5, %%mm1 \n\t" 2665 "movq %%mm6, %%mm2 \n\t" 2666 "psubusb %%mm6, %%mm5 \n\t" 2667 "psubusb %%mm1, %%mm2 \n\t" 2668 "por %%mm5, %%mm2 \n\t" 2669 "psubusb %%mm2, %%mm0 \n\t" 2670 "pcmpeqb %%mm4, %%mm0 \n\t" 2672 "pxor %%mm6, %%mm1 \n\t" 2673 "pand %%mm0, %%mm1 \n\t" 2674 "pxor %%mm1, %%mm6 \n\t" 2677 "movq (%0, %1, 8), %%mm5 \n\t" 2679 "movq (%0, %1, 8), %%mm7 \n\t" 2680 "movq %%mm5, %%mm1 \n\t" 2681 "movq %%mm7, %%mm2 \n\t" 2682 "psubusb %%mm7, %%mm5 \n\t" 2683 "psubusb %%mm1, %%mm2 \n\t" 2684 "por %%mm5, %%mm2 \n\t" 2685 "movq %2, %%mm0 \n\t" 2686 "psubusb %%mm2, %%mm0 \n\t" 2687 "pcmpeqb %%mm4, %%mm0 \n\t" 2689 "pxor %%mm7, %%mm1 \n\t" 2690 "pand %%mm0, %%mm1 \n\t" 2691 "pxor %%mm1, %%mm7 \n\t" 2693 "movq %%mm6, %%mm5 \n\t" 2694 "punpckhbw %%mm4, %%mm6 \n\t" 2695 "punpcklbw %%mm4, %%mm5 \n\t" 2698 "movq %%mm5, %%mm0 \n\t" 2699 "movq %%mm6, %%mm1 \n\t" 2700 "psllw $2, %%mm0 \n\t" 2701 "psllw $2, %%mm1 \n\t" 2702 "paddw "MANGLE(w04)
", %%mm0 \n\t" 2703 "paddw "MANGLE(w04)
", %%mm1 \n\t" 2706 "movq (%0), %%mm2 \n\t"\ 2707 "movq (%0), %%mm3 \n\t"\ 2709 "punpcklbw %%mm4, %%mm2 \n\t"\ 2710 "punpckhbw %%mm4, %%mm3 \n\t"\ 2711 "paddw %%mm2, %%mm0 \n\t"\ 2712 "paddw %%mm3, %%mm1 \n\t" 2715 "movq (%0), %%mm2 \n\t"\ 2716 "movq (%0), %%mm3 \n\t"\ 2718 "punpcklbw %%mm4, %%mm2 \n\t"\ 2719 "punpckhbw %%mm4, %%mm3 \n\t"\ 2720 "psubw %%mm2, %%mm0 \n\t"\ 2721 "psubw %%mm3, %%mm1 \n\t" 2727 "movq %%mm0, (%3) \n\t" 2728 "movq %%mm1, 8(%3) \n\t" 2731 "psubw %%mm5, %%mm0 \n\t" 2732 "psubw %%mm6, %%mm1 \n\t" 2733 "movq %%mm0, 16(%3) \n\t" 2734 "movq %%mm1, 24(%3) \n\t" 2737 "psubw %%mm5, %%mm0 \n\t" 2738 "psubw %%mm6, %%mm1 \n\t" 2739 "movq %%mm0, 32(%3) \n\t" 2740 "movq %%mm1, 40(%3) \n\t" 2743 "psubw %%mm5, %%mm0 \n\t" 2744 "psubw %%mm6, %%mm1 \n\t" 2745 "movq %%mm0, 48(%3) \n\t" 2746 "movq %%mm1, 56(%3) \n\t" 2749 "psubw %%mm5, %%mm0 \n\t" 2750 "psubw %%mm6, %%mm1 \n\t" 2751 "movq %%mm0, 64(%3) \n\t" 2752 "movq %%mm1, 72(%3) \n\t" 2754 "movq %%mm7, %%mm6 \n\t" 2755 "punpckhbw %%mm4, %%mm7 \n\t" 2756 "punpcklbw %%mm4, %%mm6 \n\t" 2762 "movq %%mm0, 80(%3) \n\t" 2763 "movq %%mm1, 88(%3) \n\t" 2766 "paddw %%mm6, %%mm0 \n\t" 2767 "paddw %%mm7, %%mm1 \n\t" 2768 "movq %%mm0, 96(%3) \n\t" 2769 "movq %%mm1, 104(%3) \n\t" 2772 "paddw %%mm6, %%mm0 \n\t" 2773 "paddw %%mm7, %%mm1 \n\t" 2774 "movq %%mm0, 112(%3) \n\t" 2775 "movq %%mm1, 120(%3) \n\t" 2778 "paddw %%mm6, %%mm0 \n\t" 2779 "paddw %%mm7, %%mm1 \n\t" 2780 "movq %%mm0, 128(%3) \n\t" 2781 "movq %%mm1, 136(%3) \n\t" 2784 "paddw %%mm6, %%mm0 \n\t" 2785 "paddw %%mm7, %%mm1 \n\t" 2786 "movq %%mm0, 144(%3) \n\t" 2787 "movq %%mm1, 152(%3) \n\t" 2798 "movq %4, %%mm6 \n\t" 2799 "pcmpeqb %%mm5, %%mm5 \n\t" 2800 "pxor %%mm6, %%mm5 \n\t" 2801 "pxor %%mm7, %%mm7 \n\t" 2804 "movq (%1), %%mm0 \n\t" 2805 "movq 8(%1), %%mm1 \n\t" 2806 "paddw 32(%1), %%mm0 \n\t" 2807 "paddw 40(%1), %%mm1 \n\t" 2808 "movq (%0, %3), %%mm2 \n\t" 2809 "movq %%mm2, %%mm3 \n\t" 2810 "movq %%mm2, %%mm4 \n\t" 2811 "punpcklbw %%mm7, %%mm2 \n\t" 2812 "punpckhbw %%mm7, %%mm3 \n\t" 2813 "paddw %%mm2, %%mm0 \n\t" 2814 "paddw %%mm3, %%mm1 \n\t" 2815 "paddw %%mm2, %%mm0 \n\t" 2816 "paddw %%mm3, %%mm1 \n\t" 2817 "psrlw $4, %%mm0 \n\t" 2818 "psrlw $4, %%mm1 \n\t" 2819 "packuswb %%mm1, %%mm0 \n\t" 2820 "pand %%mm6, %%mm0 \n\t" 2821 "pand %%mm5, %%mm4 \n\t" 2822 "por %%mm4, %%mm0 \n\t" 2823 "movq %%mm0, (%0, %3) \n\t" 2828 :
"+r"(
offset),
"+r"(temp_sums)
2834 if(eq_mask != -1LL){
2838 "pxor %%mm7, %%mm7 \n\t" 2842 "movq (%0), %%mm0 \n\t" 2843 "movq %%mm0, %%mm1 \n\t" 2844 "punpcklbw %%mm7, %%mm0 \n\t" 2845 "punpckhbw %%mm7, %%mm1 \n\t" 2847 "movq (%0, %1), %%mm2 \n\t" 2848 "lea (%0, %1, 2), %%"REG_a
" \n\t" 2849 "movq %%mm2, %%mm3 \n\t" 2850 "punpcklbw %%mm7, %%mm2 \n\t" 2851 "punpckhbw %%mm7, %%mm3 \n\t" 2853 "movq (%%"REG_a
"), %%mm4 \n\t" 2854 "movq %%mm4, %%mm5 \n\t" 2855 "punpcklbw %%mm7, %%mm4 \n\t" 2856 "punpckhbw %%mm7, %%mm5 \n\t" 2858 "paddw %%mm0, %%mm0 \n\t" 2859 "paddw %%mm1, %%mm1 \n\t" 2860 "psubw %%mm4, %%mm2 \n\t" 2861 "psubw %%mm5, %%mm3 \n\t" 2862 "psubw %%mm2, %%mm0 \n\t" 2863 "psubw %%mm3, %%mm1 \n\t" 2865 "psllw $2, %%mm2 \n\t" 2866 "psllw $2, %%mm3 \n\t" 2867 "psubw %%mm2, %%mm0 \n\t" 2868 "psubw %%mm3, %%mm1 \n\t" 2870 "movq (%%"REG_a
", %1), %%mm2 \n\t" 2871 "movq %%mm2, %%mm3 \n\t" 2872 "punpcklbw %%mm7, %%mm2 \n\t" 2873 "punpckhbw %%mm7, %%mm3 \n\t" 2875 "psubw %%mm2, %%mm0 \n\t" 2876 "psubw %%mm3, %%mm1 \n\t" 2877 "psubw %%mm2, %%mm0 \n\t" 2878 "psubw %%mm3, %%mm1 \n\t" 2879 "movq %%mm0, (%4) \n\t" 2880 "movq %%mm1, 8(%4) \n\t" 2882 "movq (%%"REG_a
", %1, 2), %%mm0 \n\t" 2883 "movq %%mm0, %%mm1 \n\t" 2884 "punpcklbw %%mm7, %%mm0 \n\t" 2885 "punpckhbw %%mm7, %%mm1 \n\t" 2887 "psubw %%mm0, %%mm2 \n\t" 2888 "psubw %%mm1, %%mm3 \n\t" 2889 "movq %%mm2, 16(%4) \n\t" 2890 "movq %%mm3, 24(%4) \n\t" 2891 "paddw %%mm4, %%mm4 \n\t" 2892 "paddw %%mm5, %%mm5 \n\t" 2893 "psubw %%mm2, %%mm4 \n\t" 2894 "psubw %%mm3, %%mm5 \n\t" 2896 "lea (%%"REG_a
", %1), %0 \n\t" 2897 "psllw $2, %%mm2 \n\t" 2898 "psllw $2, %%mm3 \n\t" 2899 "psubw %%mm2, %%mm4 \n\t" 2900 "psubw %%mm3, %%mm5 \n\t" 2902 "movq (%0, %1, 2), %%mm2 \n\t" 2903 "movq %%mm2, %%mm3 \n\t" 2904 "punpcklbw %%mm7, %%mm2 \n\t" 2905 "punpckhbw %%mm7, %%mm3 \n\t" 2906 "psubw %%mm2, %%mm4 \n\t" 2907 "psubw %%mm3, %%mm5 \n\t" 2908 "psubw %%mm2, %%mm4 \n\t" 2909 "psubw %%mm3, %%mm5 \n\t" 2911 "movq (%%"REG_a
", %1, 4), %%mm6 \n\t" 2912 "punpcklbw %%mm7, %%mm6 \n\t" 2913 "psubw %%mm6, %%mm2 \n\t" 2914 "movq (%%"REG_a
", %1, 4), %%mm6 \n\t" 2915 "punpckhbw %%mm7, %%mm6 \n\t" 2916 "psubw %%mm6, %%mm3 \n\t" 2918 "paddw %%mm0, %%mm0 \n\t" 2919 "paddw %%mm1, %%mm1 \n\t" 2920 "psubw %%mm2, %%mm0 \n\t" 2921 "psubw %%mm3, %%mm1 \n\t" 2923 "psllw $2, %%mm2 \n\t" 2924 "psllw $2, %%mm3 \n\t" 2925 "psubw %%mm2, %%mm0 \n\t" 2926 "psubw %%mm3, %%mm1 \n\t" 2928 "movq (%0, %1, 4), %%mm2 \n\t" 2929 "movq %%mm2, %%mm3 \n\t" 2930 "punpcklbw %%mm7, %%mm2 \n\t" 2931 "punpckhbw %%mm7, %%mm3 \n\t" 2933 "paddw %%mm2, %%mm2 \n\t" 2934 "paddw %%mm3, %%mm3 \n\t" 2935 "psubw %%mm2, %%mm0 \n\t" 2936 "psubw %%mm3, %%mm1 \n\t" 2938 "movq (%4), %%mm2 \n\t" 2939 "movq 8(%4), %%mm3 \n\t" 2941 #if TEMPLATE_PP_MMXEXT 2942 "movq %%mm7, %%mm6 \n\t" 2943 "psubw %%mm0, %%mm6 \n\t" 2944 "pmaxsw %%mm6, %%mm0 \n\t" 2945 "movq %%mm7, %%mm6 \n\t" 2946 "psubw %%mm1, %%mm6 \n\t" 2947 "pmaxsw %%mm6, %%mm1 \n\t" 2948 "movq %%mm7, %%mm6 \n\t" 2949 "psubw %%mm2, %%mm6 \n\t" 2950 "pmaxsw %%mm6, %%mm2 \n\t" 2951 "movq %%mm7, %%mm6 \n\t" 2952 "psubw %%mm3, %%mm6 \n\t" 2953 "pmaxsw %%mm6, %%mm3 \n\t" 2955 "movq %%mm7, %%mm6 \n\t" 2956 "pcmpgtw %%mm0, %%mm6 \n\t" 2957 "pxor %%mm6, %%mm0 \n\t" 2958 "psubw %%mm6, %%mm0 \n\t" 2959 "movq %%mm7, %%mm6 \n\t" 2960 "pcmpgtw %%mm1, %%mm6 \n\t" 2961 "pxor %%mm6, %%mm1 \n\t" 2962 "psubw %%mm6, %%mm1 \n\t" 2963 "movq %%mm7, %%mm6 \n\t" 2964 "pcmpgtw %%mm2, %%mm6 \n\t" 2965 "pxor %%mm6, %%mm2 \n\t" 2966 "psubw %%mm6, %%mm2 \n\t" 2967 "movq %%mm7, %%mm6 \n\t" 2968 "pcmpgtw %%mm3, %%mm6 \n\t" 2969 "pxor %%mm6, %%mm3 \n\t" 2970 "psubw %%mm6, %%mm3 \n\t" 2973 #if TEMPLATE_PP_MMXEXT 2974 "pminsw %%mm2, %%mm0 \n\t" 2975 "pminsw %%mm3, %%mm1 \n\t" 2977 "movq %%mm0, %%mm6 \n\t" 2978 "psubusw %%mm2, %%mm6 \n\t" 2979 "psubw %%mm6, %%mm0 \n\t" 2980 "movq %%mm1, %%mm6 \n\t" 2981 "psubusw %%mm3, %%mm6 \n\t" 2982 "psubw %%mm6, %%mm1 \n\t" 2985 "movd %2, %%mm2 \n\t" 2986 "punpcklbw %%mm7, %%mm2 \n\t" 2988 "movq %%mm7, %%mm6 \n\t" 2989 "pcmpgtw %%mm4, %%mm6 \n\t" 2990 "pxor %%mm6, %%mm4 \n\t" 2991 "psubw %%mm6, %%mm4 \n\t" 2992 "pcmpgtw %%mm5, %%mm7 \n\t" 2993 "pxor %%mm7, %%mm5 \n\t" 2994 "psubw %%mm7, %%mm5 \n\t" 2996 "psllw $3, %%mm2 \n\t" 2997 "movq %%mm2, %%mm3 \n\t" 2998 "pcmpgtw %%mm4, %%mm2 \n\t" 2999 "pcmpgtw %%mm5, %%mm3 \n\t" 3000 "pand %%mm2, %%mm4 \n\t" 3001 "pand %%mm3, %%mm5 \n\t" 3004 "psubusw %%mm0, %%mm4 \n\t" 3005 "psubusw %%mm1, %%mm5 \n\t" 3008 "movq "MANGLE(w05)
", %%mm2 \n\t" 3009 "pmullw %%mm2, %%mm4 \n\t" 3010 "pmullw %%mm2, %%mm5 \n\t" 3011 "movq "MANGLE(w20)
", %%mm2 \n\t" 3012 "paddw %%mm2, %%mm4 \n\t" 3013 "paddw %%mm2, %%mm5 \n\t" 3014 "psrlw $6, %%mm4 \n\t" 3015 "psrlw $6, %%mm5 \n\t" 3017 "movq 16(%4), %%mm0 \n\t" 3018 "movq 24(%4), %%mm1 \n\t" 3020 "pxor %%mm2, %%mm2 \n\t" 3021 "pxor %%mm3, %%mm3 \n\t" 3023 "pcmpgtw %%mm0, %%mm2 \n\t" 3024 "pcmpgtw %%mm1, %%mm3 \n\t" 3025 "pxor %%mm2, %%mm0 \n\t" 3026 "pxor %%mm3, %%mm1 \n\t" 3027 "psubw %%mm2, %%mm0 \n\t" 3028 "psubw %%mm3, %%mm1 \n\t" 3029 "psrlw $1, %%mm0 \n\t" 3030 "psrlw $1, %%mm1 \n\t" 3032 "pxor %%mm6, %%mm2 \n\t" 3033 "pxor %%mm7, %%mm3 \n\t" 3034 "pand %%mm2, %%mm4 \n\t" 3035 "pand %%mm3, %%mm5 \n\t" 3037 #if TEMPLATE_PP_MMXEXT 3038 "pminsw %%mm0, %%mm4 \n\t" 3039 "pminsw %%mm1, %%mm5 \n\t" 3041 "movq %%mm4, %%mm2 \n\t" 3042 "psubusw %%mm0, %%mm2 \n\t" 3043 "psubw %%mm2, %%mm4 \n\t" 3044 "movq %%mm5, %%mm2 \n\t" 3045 "psubusw %%mm1, %%mm2 \n\t" 3046 "psubw %%mm2, %%mm5 \n\t" 3048 "pxor %%mm6, %%mm4 \n\t" 3049 "pxor %%mm7, %%mm5 \n\t" 3050 "psubw %%mm6, %%mm4 \n\t" 3051 "psubw %%mm7, %%mm5 \n\t" 3052 "packsswb %%mm5, %%mm4 \n\t" 3053 "movq %3, %%mm1 \n\t" 3054 "pandn %%mm4, %%mm1 \n\t" 3055 "movq (%0), %%mm0 \n\t" 3056 "paddb %%mm1, %%mm0 \n\t" 3057 "movq %%mm0, (%0) \n\t" 3058 "movq (%0, %1), %%mm0 \n\t" 3059 "psubb %%mm1, %%mm0 \n\t" 3060 "movq %%mm0, (%0, %1) \n\t" 3063 :
"r" ((
x86_reg)
step),
"m" (
c->pQPb),
"m"(eq_mask),
"r"(tmp)
3074 #endif //TEMPLATE_PP_MMX 3083 #undef REAL_SCALED_CPY 3087 int levelFix, int64_t *packedOffsetAndScale)
3089 #if !TEMPLATE_PP_MMX 3095 "movq (%%"REG_a
"), %%mm2 \n\t" 3096 "movq 8(%%"REG_a
"), %%mm3 \n\t" 3097 "lea (%2,%4), %%"REG_a
" \n\t" 3098 "lea (%3,%5), %%"REG_d
" \n\t" 3099 "pxor %%mm4, %%mm4 \n\t" 3100 #if TEMPLATE_PP_MMXEXT 3101 #define REAL_SCALED_CPY(src1, src2, dst1, dst2) \ 3102 "movq " #src1 ", %%mm0 \n\t"\ 3103 "movq " #src1 ", %%mm5 \n\t"\ 3104 "movq " #src2 ", %%mm1 \n\t"\ 3105 "movq " #src2 ", %%mm6 \n\t"\ 3106 "punpcklbw %%mm0, %%mm0 \n\t"\ 3107 "punpckhbw %%mm5, %%mm5 \n\t"\ 3108 "punpcklbw %%mm1, %%mm1 \n\t"\ 3109 "punpckhbw %%mm6, %%mm6 \n\t"\ 3110 "pmulhuw %%mm3, %%mm0 \n\t"\ 3111 "pmulhuw %%mm3, %%mm5 \n\t"\ 3112 "pmulhuw %%mm3, %%mm1 \n\t"\ 3113 "pmulhuw %%mm3, %%mm6 \n\t"\ 3114 "psubw %%mm2, %%mm0 \n\t"\ 3115 "psubw %%mm2, %%mm5 \n\t"\ 3116 "psubw %%mm2, %%mm1 \n\t"\ 3117 "psubw %%mm2, %%mm6 \n\t"\ 3118 "packuswb %%mm5, %%mm0 \n\t"\ 3119 "packuswb %%mm6, %%mm1 \n\t"\ 3120 "movq %%mm0, " #dst1 " \n\t"\ 3121 "movq %%mm1, " #dst2 " \n\t"\ 3123 #else //TEMPLATE_PP_MMXEXT 3124 #define REAL_SCALED_CPY(src1, src2, dst1, dst2) \ 3125 "movq " #src1 ", %%mm0 \n\t"\ 3126 "movq " #src1 ", %%mm5 \n\t"\ 3127 "punpcklbw %%mm4, %%mm0 \n\t"\ 3128 "punpckhbw %%mm4, %%mm5 \n\t"\ 3129 "psubw %%mm2, %%mm0 \n\t"\ 3130 "psubw %%mm2, %%mm5 \n\t"\ 3131 "movq " #src2 ", %%mm1 \n\t"\ 3132 "psllw $6, %%mm0 \n\t"\ 3133 "psllw $6, %%mm5 \n\t"\ 3134 "pmulhw %%mm3, %%mm0 \n\t"\ 3135 "movq " #src2 ", %%mm6 \n\t"\ 3136 "pmulhw %%mm3, %%mm5 \n\t"\ 3137 "punpcklbw %%mm4, %%mm1 \n\t"\ 3138 "punpckhbw %%mm4, %%mm6 \n\t"\ 3139 "psubw %%mm2, %%mm1 \n\t"\ 3140 "psubw %%mm2, %%mm6 \n\t"\ 3141 "psllw $6, %%mm1 \n\t"\ 3142 "psllw $6, %%mm6 \n\t"\ 3143 "pmulhw %%mm3, %%mm1 \n\t"\ 3144 "pmulhw %%mm3, %%mm6 \n\t"\ 3145 "packuswb %%mm5, %%mm0 \n\t"\ 3146 "packuswb %%mm6, %%mm1 \n\t"\ 3147 "movq %%mm0, " #dst1 " \n\t"\ 3148 "movq %%mm1, " #dst2 " \n\t"\ 3150 #endif //TEMPLATE_PP_MMXEXT 3151 #define SCALED_CPY(src1, src2, dst1, dst2)\ 3152 REAL_SCALED_CPY(src1, src2, dst1, dst2) 3154 SCALED_CPY((%2) , (%2, %4) , (%3) , (%3, %5))
3155 SCALED_CPY((%2, %4, 2), (%%REGa, %4, 2), (%3, %5, 2), (%%REGd, %5, 2))
3156 SCALED_CPY((%2, %4, 4), (%%REGa, %4, 4), (%3, %5, 4), (%%REGd, %5, 4))
3157 "lea (%%"REG_a
",%4,4), %%"REG_a
" \n\t" 3158 "lea (%%"REG_d
",%5,4), %%"REG_d
" \n\t" 3159 SCALED_CPY((%%REGa, %4), (%%REGa, %4, 2), (%%REGd, %5), (%%REGd, %5, 2))
3162 :
"=&a" (packedOffsetAndScale)
3163 :
"0" (packedOffsetAndScale),
3170 #else //TEMPLATE_PP_MMX 3172 memcpy( &(
dst[dstStride*i]),
3174 #endif //TEMPLATE_PP_MMX 3178 "lea (%0,%2), %%"REG_a
" \n\t" 3179 "lea (%1,%3), %%"REG_d
" \n\t" 3181 #define REAL_SIMPLE_CPY(src1, src2, dst1, dst2) \ 3182 "movq " #src1 ", %%mm0 \n\t"\ 3183 "movq " #src2 ", %%mm1 \n\t"\ 3184 "movq %%mm0, " #dst1 " \n\t"\ 3185 "movq %%mm1, " #dst2 " \n\t"\ 3187 #define SIMPLE_CPY(src1, src2, dst1, dst2)\ 3188 REAL_SIMPLE_CPY(src1, src2, dst1, dst2) 3190 SIMPLE_CPY((%0) , (%0, %2) , (%1) , (%1, %3))
3191 SIMPLE_CPY((%0, %2, 2), (%%REGa, %2, 2), (%1, %3, 2), (%%REGd, %3, 2))
3192 SIMPLE_CPY((%0, %2, 4), (%%REGa, %2, 4), (%1, %3, 4), (%%REGd, %3, 4))
3193 "lea (%%"REG_a
",%2,4), %%"REG_a
" \n\t" 3194 "lea (%%"REG_d
",%3,4), %%"REG_d
" \n\t" 3195 SIMPLE_CPY((%%REGa, %2), (%%REGa, %2, 2), (%%REGd, %3), (%%REGd, %3, 2))
3201 :
"%"REG_a,
"%"REG_d
3203 #else //TEMPLATE_PP_MMX 3205 memcpy( &(
dst[dstStride*i]),
3207 #endif //TEMPLATE_PP_MMX 3218 "movq (%0), %%mm0 \n\t" 3219 "movq %%mm0, (%0, %1, 4) \n\t" 3221 "movq %%mm0, (%0) \n\t" 3222 "movq %%mm0, (%0, %1) \n\t" 3223 "movq %%mm0, (%0, %1, 2) \n\t" 3224 "movq %%mm0, (%0, %1, 4) \n\t" 3246 #ifdef TEMPLATE_PP_TIME_MODE 3247 const int mode= TEMPLATE_PP_TIME_MODE;
3249 const int mode= isColor ?
c.ppMode.chromMode :
c.ppMode.lumMode;
3251 int black=0, white=255;
3252 int QPCorrecture= 256*256;
3259 const int qpHShift= isColor ? 4-
c.hChromaSubSample : 4;
3260 const int qpVShift= isColor ? 4-
c.vChromaSubSample : 4;
3263 uint64_t *
const yHistogram=
c.yHistogram;
3264 uint8_t *
const tempSrc= srcStride > 0 ?
c.tempSrc :
c.tempSrc - 23*srcStride;
3265 uint8_t *
const tempDst= (dstStride > 0 ?
c.tempDst :
c.tempDst - 23*dstStride) + 32;
3269 for(i=0; i<57; i++){
3270 int offset= ((i*
c.ppMode.baseDcDiff)>>8) + 1;
3271 int threshold= offset*2 + 1;
3273 c.mmxDcThreshold[
i]= 0x7F - threshold;
3274 c.mmxDcOffset[
i]*= 0x0101010101010101LL;
3275 c.mmxDcThreshold[
i]*= 0x0101010101010101LL;
3289 else if(mode &
DERING) copyAhead=9;
3297 uint64_t maxClipped;
3303 if(
c.frameNum == 1) yHistogram[0]=
width*(uint64_t)
height/64*15/256;
3305 for(i=0; i<256; i++){
3306 sum+= yHistogram[
i];
3310 maxClipped= (uint64_t)(sum *
c.ppMode.maxClippedThreshold);
3313 for(black=255; black>0; black--){
3314 if(clipped < maxClipped)
break;
3315 clipped-= yHistogram[black];
3319 for(white=0; white<256; white++){
3320 if(clipped < maxClipped)
break;
3321 clipped-= yHistogram[white];
3324 scale= (double)(
c.ppMode.maxAllowedY -
c.ppMode.minAllowedY) / (double)(white-black);
3326 #if TEMPLATE_PP_MMXEXT 3327 c.packedYScale= (uint16_t)(scale*256.0 + 0.5);
3328 c.packedYOffset= (((black*
c.packedYScale)>>8) -
c.ppMode.minAllowedY) & 0xFFFF;
3330 c.packedYScale= (uint16_t)(scale*1024.0 + 0.5);
3331 c.packedYOffset= (black -
c.ppMode.minAllowedY) & 0xFFFF;
3334 c.packedYOffset|=
c.packedYOffset<<32;
3335 c.packedYOffset|=
c.packedYOffset<<16;
3337 c.packedYScale|=
c.packedYScale<<32;
3338 c.packedYScale|=
c.packedYScale<<16;
3340 if(mode &
LEVEL_FIX) QPCorrecture= (int)(scale*256*256 + 0.5);
3341 else QPCorrecture= 256*256;
3343 c.packedYScale= 0x0100010001000100LL;
3345 QPCorrecture= 256*256;
3351 const uint8_t *srcBlock= &(
src[y*srcStride]);
3352 uint8_t *dstBlock= tempDst + dstStride;
3359 #if TEMPLATE_PP_MMXEXT 3368 "mov %4, %%"REG_a
" \n\t" 3369 "shr $2, %%"REG_a
" \n\t" 3370 "and $6, %%"REG_a
" \n\t" 3371 "add %5, %%"REG_a
" \n\t" 3372 "mov %%"REG_a
", %%"REG_d
" \n\t" 3373 "imul %1, %%"REG_a
" \n\t" 3374 "imul %3, %%"REG_d
" \n\t" 3375 "prefetchnta 32(%%"REG_a
", %0) \n\t" 3376 "prefetcht0 32(%%"REG_d
", %2) \n\t" 3377 "add %1, %%"REG_a
" \n\t" 3378 "add %3, %%"REG_d
" \n\t" 3379 "prefetchnta 32(%%"REG_a
", %0) \n\t" 3380 "prefetcht0 32(%%"REG_d
", %2) \n\t" 3381 ::
"r" (srcBlock),
"r" ((
x86_reg)srcStride),
"r" (dstBlock),
"r" ((
x86_reg)dstStride),
3383 :
"%"REG_a,
"%"REG_d
3386 #elif TEMPLATE_PP_3DNOW 3396 srcBlock + srcStride*8, srcStride, mode &
LEVEL_FIX, &
c.packedYOffset);
3402 else if(mode & LINEAR_BLEND_DEINT_FILTER)
3404 else if(mode & MEDIAN_DEINT_FILTER)
3406 else if(mode & CUBIC_IPOL_DEINT_FILTER)
3410 else if(mode & LOWPASS5_DEINT_FILTER)
3418 if(width==
FFABS(dstStride))
3419 linecpy(
dst, tempDst + 9*dstStride, copyAhead, dstStride);
3422 for(i=0; i<copyAhead; i++){
3423 memcpy(
dst + i*dstStride, tempDst + (9+i)*dstStride, width);
3430 const uint8_t *srcBlock= &(
src[y*srcStride]);
3434 uint8_t *tempBlock2=
c.tempBlocks + 8;
3436 const int8_t *QPptr= &QPs[(y>>qpVShift)*QPStride];
3437 int8_t *nonBQPptr= &
c.nonBQPTable[(y>>qpVShift)*
FFABS(QPStride)];
3445 linecpy(tempSrc + srcStride*copyAhead, srcBlock + srcStride*copyAhead,
3446 FFMAX(height-y-copyAhead, 0), srcStride);
3449 for(i=
FFMAX(height-y, 8); i<copyAhead+8; i++)
3450 memcpy(tempSrc + srcStride*i,
src + srcStride*(height-1),
FFABS(srcStride));
3453 linecpy(tempDst, dstBlock - dstStride,
FFMIN(height-y+1, copyAhead+1), dstStride);
3456 for(i=height-y+1; i<=copyAhead; i++)
3457 memcpy(tempDst + dstStride*i,
dst + dstStride*(height-1),
FFABS(dstStride));
3459 dstBlock= tempDst + dstStride;
3467 const int stride= dstStride;
3472 QP= QPptr[x>>qpHShift];
3473 c.nonBQP= nonBQPptr[x>>qpHShift];
3476 QP= (QP* QPCorrecture + 256*128)>>16;
3477 c.nonBQP= nonBQPptr[x>>4];
3478 c.nonBQP= (
c.nonBQP* QPCorrecture + 256*128)>>16;
3479 yHistogram[ srcBlock[srcStride*12 + 4] ]++;
3484 "movd %1, %%mm7 \n\t" 3485 "packuswb %%mm7, %%mm7 \n\t" 3486 "packuswb %%mm7, %%mm7 \n\t" 3487 "packuswb %%mm7, %%mm7 \n\t" 3488 "movq %%mm7, %0 \n\t" 3495 #if TEMPLATE_PP_MMXEXT 3504 "mov %4, %%"REG_a
" \n\t" 3505 "shr $2, %%"REG_a
" \n\t" 3506 "and $6, %%"REG_a
" \n\t" 3507 "add %5, %%"REG_a
" \n\t" 3508 "mov %%"REG_a
", %%"REG_d
" \n\t" 3509 "imul %1, %%"REG_a
" \n\t" 3510 "imul %3, %%"REG_d
" \n\t" 3511 "prefetchnta 32(%%"REG_a
", %0) \n\t" 3512 "prefetcht0 32(%%"REG_d
", %2) \n\t" 3513 "add %1, %%"REG_a
" \n\t" 3514 "add %3, %%"REG_d
" \n\t" 3515 "prefetchnta 32(%%"REG_a
", %0) \n\t" 3516 "prefetcht0 32(%%"REG_d
", %2) \n\t" 3517 ::
"r" (srcBlock),
"r" ((
x86_reg)srcStride),
"r" (dstBlock),
"r" ((
x86_reg)dstStride),
3519 :
"%"REG_a,
"%"REG_d
3522 #elif TEMPLATE_PP_3DNOW 3532 srcBlock + srcStride*copyAhead, srcStride, mode &
LEVEL_FIX, &
c.packedYOffset);
3536 else if(mode & LINEAR_BLEND_DEINT_FILTER)
3538 else if(mode & MEDIAN_DEINT_FILTER)
3540 else if(mode & CUBIC_IPOL_DEINT_FILTER)
3544 else if(mode & LOWPASS5_DEINT_FILTER)
3552 if(mode & V_X1_FILTER)
3554 else if(mode & V_DEBLOCK){
3567 RENAME(transpose1)(tempBlock1, tempBlock2, dstBlock, dstStride);
3576 const int t=
RENAME(vertClassify)(tempBlock1, 16, &
c);
3583 RENAME(do_a_deblock)(tempBlock1, 16, 1, &
c);
3586 RENAME(transpose2)(dstBlock-4, dstStride, tempBlock1 + 4*16);
3589 if(mode & H_X1_FILTER)
3591 else if(mode & H_DEBLOCK){
3592 #if TEMPLATE_PP_ALTIVEC 3614 }
else if(mode & H_A_DEBLOCK){
3617 #endif //TEMPLATE_PP_MMX 3626 c.tempBlurred[isColor] + y*dstStride +
x,
3627 c.tempBlurredPast[isColor] + (y>>3)*256 + (x>>3) + 256,
3628 c.ppMode.maxTmpNoise);
3636 tmpXchg= tempBlock1;
3637 tempBlock1= tempBlock2;
3638 tempBlock2 = tmpXchg;
3643 if(y > 0)
RENAME(
dering)(dstBlock - dstStride - 8, dstStride, &
c);
3648 c.tempBlurred[isColor] + y*dstStride +
x,
3649 c.tempBlurredPast[isColor] + (y>>3)*256 + (x>>3) + 256,
3650 c.ppMode.maxTmpNoise);
3656 if(width==
FFABS(dstStride))
3657 linecpy(dstBlock, tempDst + dstStride, height-y, dstStride);
3660 for(i=0; i<height-
y; i++){
3661 memcpy(dstBlock + i*dstStride, tempDst + (i+1)*dstStride, width);
3675 #if TEMPLATE_PP_3DNOW 3676 __asm__
volatile(
"femms");
3677 #elif TEMPLATE_PP_MMX 3678 __asm__
volatile(
"emms");
3681 #ifdef DEBUG_BRIGHTNESS 3685 for(i=0; i<256; i++)
3686 if(yHistogram[i] > max) max=yHistogram[
i];
3688 for(i=1; i<256; i++){
3690 int start=yHistogram[i-1]/(max/256+1);
3691 int end=yHistogram[
i]/(max/256+1);
3692 int inc= end > start ? 1 : -1;
3693 for(x=start; x!=end+inc; x+=inc)
3694 dst[ i*dstStride + x]+=128;
3697 for(i=0; i<100; i+=2){
3698 dst[ (white)*dstStride + i]+=128;
3699 dst[ (black)*dstStride + i]+=128;
3709 #undef TEMPLATE_PP_C 3710 #undef TEMPLATE_PP_ALTIVEC 3711 #undef TEMPLATE_PP_MMX 3712 #undef TEMPLATE_PP_MMXEXT 3713 #undef TEMPLATE_PP_3DNOW 3714 #undef TEMPLATE_PP_SSE2
static void RENAME() deInterlaceL5(uint8_t src[], int stride, uint8_t *tmp, uint8_t *tmp2)
Deinterlace the given block by filtering every line with a (-1 2 6 2 -1) filter.
static int vertClassify_altivec(uint8_t src[], int stride, PPContext *c)
FIXME Range Coding of cr are ref
if max(w)>1 w=0.9 *w/max(w)
#define DECLARE_ALIGNED(n, t, v)
static void RENAME() duplicate(uint8_t src[], int stride)
Duplicate the given 8 src pixels ? times upward.
static void RENAME() doVertLowPass(uint8_t *src, int stride, PPContext *c)
Do a vertical low pass filter on the 8x16 block (only write to the 8x8 block in the middle) using the...
static void RENAME() deInterlaceFF(uint8_t src[], int stride, uint8_t *tmp)
Deinterlace the given block by filtering every second line with a (-1 4 2 4 -1) filter.
static void transpose_16x8_char_toPackedAlign_altivec(unsigned char *dst, unsigned char *src, int stride)
#define LINEAR_BLEND_DEINT_FILTER
static void transpose_8x16_char_fromPackedAlign_altivec(unsigned char *dst, unsigned char *src, int stride)
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But first
static void RENAME() deInterlaceBlendLinear(uint8_t src[], int stride, uint8_t *tmp)
Deinterlace the given block by filtering all lines with a (1 2 1) filter.
#define LOWPASS5_DEINT_FILTER
static void horizX1Filter(uint8_t *src, int stride, int QP)
Experimental Filter 1 (Horizontal) will not damage linear gradients Flat blocks should look like they...
static void RENAME() deInterlaceMedian(uint8_t src[], int stride)
Deinterlace the given block by applying a median filter to every second line.
static void linecpy(void *dest, const void *src, int lines, int stride)
#define MEDIAN_DEINT_FILTER
#define CUBIC_IPOL_DEINT_FILTER
static void RENAME() dering(uint8_t src[], int stride, PPContext *c)
void av_log(void *avcl, int level, const char *fmt,...)
static const uint8_t offset[127][2]
static void RENAME() deInterlaceInterpolateLinear(uint8_t src[], int stride)
Deinterlace the given block by linearly interpolating every second line.
#define LINEAR_IPOL_DEINT_FILTER
static void RENAME() blockCopy(uint8_t dst[], int dstStride, const uint8_t src[], int srcStride, int levelFix, int64_t *packedOffsetAndScale)
Copy a block from src to dst and fixes the blacklevel.
#define XMM_CLOBBERS(...)
#define FFMPEG_DEINT_FILTER
static void doVertDefFilter_altivec(uint8_t src[], int stride, PPContext *c)
#define diff(a, as, b, bs)
static void RENAME() deInterlaceInterpolateCubic(uint8_t src[], int stride)
Deinterlace the given block by cubic interpolating every second line.
static void RENAME() postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, const QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c)
Filter array of bytes (Y or U or V values)
static void RENAME() vertX1Filter(uint8_t *src, int stride, PPContext *co)
Experimental Filter 1 will not damage linear gradients Flat blocks should look like they were passed ...
BYTE int const BYTE int int int height
synthesis window for stochastic i
static void RENAME() doVertDefFilter(uint8_t src[], int stride, PPContext *c)
static void doVertLowPass_altivec(uint8_t *src, int stride, PPContext *c)
static void RENAME() tempNoiseReducer(uint8_t *src, int stride, uint8_t *tempBlurred, uint32_t *tempBlurredPast, int *maxNoise)
#define TEMP_NOISE_FILTER
else dst[i][x+y *dst_stride[i]]
#define LEVEL_FIX
Brightness & Contrast.
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But a word about which is also called distortion Distortion can be quantified by almost any quality measurement one chooses the sum of squared differences is used but more complex methods that consider psychovisual effects can be used as well It makes no difference in this discussion First step