38 #define OP_AVG(S,D) "pavgb " #S ", " #D " \n\t" 41 #define NORMALIZE_MMX(SHIFT) \ 42 "paddw %%mm7, %%mm3 \n\t" \ 43 "paddw %%mm7, %%mm4 \n\t" \ 44 "psraw "SHIFT", %%mm3 \n\t" \ 45 "psraw "SHIFT", %%mm4 \n\t" 47 #define TRANSFER_DO_PACK(OP) \ 48 "packuswb %%mm4, %%mm3 \n\t" \ 50 "movq %%mm3, (%2) \n\t" 52 #define TRANSFER_DONT_PACK(OP) \ 55 "movq %%mm3, 0(%2) \n\t" \ 56 "movq %%mm4, 8(%2) \n\t" 59 #define DO_UNPACK(reg) "punpcklbw %%mm0, " reg "\n\t" 60 #define DONT_UNPACK(reg) 63 #define LOAD_ROUNDER_MMX(ROUND) \ 64 "movd "ROUND", %%mm7 \n\t" \ 65 "punpcklwd %%mm7, %%mm7 \n\t" \ 66 "punpckldq %%mm7, %%mm7 \n\t" 68 #define SHIFT2_LINE(OFF, R0,R1,R2,R3) \ 69 "paddw %%mm"#R2", %%mm"#R1" \n\t" \ 70 "movd (%0,%3), %%mm"#R0" \n\t" \ 71 "pmullw %%mm6, %%mm"#R1" \n\t" \ 72 "punpcklbw %%mm0, %%mm"#R0" \n\t" \ 73 "movd (%0,%2), %%mm"#R3" \n\t" \ 74 "psubw %%mm"#R0", %%mm"#R1" \n\t" \ 75 "punpcklbw %%mm0, %%mm"#R3" \n\t" \ 76 "paddw %%mm7, %%mm"#R1" \n\t" \ 77 "psubw %%mm"#R3", %%mm"#R1" \n\t" \ 78 "psraw %4, %%mm"#R1" \n\t" \ 79 "movq %%mm"#R1", "#OFF"(%1) \n\t" \ 83 static void vc1_put_ver_16b_shift2_mmx(int16_t *
dst,
85 int rnd, int64_t
shift)
88 "mov $3, %%"REG_c
" \n\t" 89 LOAD_ROUNDER_MMX(
"%5")
90 "movq "MANGLE(ff_pw_9)
", %%mm6 \n\t" 92 "movd (%0), %%mm2 \n\t" 94 "movd (%0), %%mm3 \n\t" 95 "punpcklbw %%mm0, %%mm2 \n\t" 96 "punpcklbw %%mm0, %%mm3 \n\t" 97 SHIFT2_LINE( 0, 1, 2, 3, 4)
98 SHIFT2_LINE( 24, 2, 3, 4, 1)
99 SHIFT2_LINE( 48, 3, 4, 1, 2)
100 SHIFT2_LINE( 72, 4, 1, 2, 3)
101 SHIFT2_LINE( 96, 1, 2, 3, 4)
102 SHIFT2_LINE(120, 2, 3, 4, 1)
103 SHIFT2_LINE(144, 3, 4, 1, 2)
104 SHIFT2_LINE(168, 4, 1, 2, 3)
109 : "+
r"(src), "+
r"(dst)
110 : "
r"(stride), "
r"(-2*stride),
111 "
m"(shift), "
m"(rnd), "
r"(9*stride-4)
120 #define VC1_HOR_16b_SHIFT2(OP, OPNAME)\ 121 static void OPNAME ## vc1_hor_16b_shift2_mmx(uint8_t *dst, x86_reg stride,\ 122 const int16_t *src, int rnd)\ 127 rnd -= (-1+9+9-1)*1024; \ 129 LOAD_ROUNDER_MMX("%4")\ 130 "movq "MANGLE(ff_pw_128)", %%mm6\n\t"\ 131 "movq "MANGLE(ff_pw_9)", %%mm5 \n\t"\ 133 "movq 2*0+0(%1), %%mm1 \n\t"\ 134 "movq 2*0+8(%1), %%mm2 \n\t"\ 135 "movq 2*1+0(%1), %%mm3 \n\t"\ 136 "movq 2*1+8(%1), %%mm4 \n\t"\ 137 "paddw 2*3+0(%1), %%mm1 \n\t"\ 138 "paddw 2*3+8(%1), %%mm2 \n\t"\ 139 "paddw 2*2+0(%1), %%mm3 \n\t"\ 140 "paddw 2*2+8(%1), %%mm4 \n\t"\ 141 "pmullw %%mm5, %%mm3 \n\t"\ 142 "pmullw %%mm5, %%mm4 \n\t"\ 143 "psubw %%mm1, %%mm3 \n\t"\ 144 "psubw %%mm2, %%mm4 \n\t"\ 147 "paddw %%mm6, %%mm3 \n\t"\ 148 "paddw %%mm6, %%mm4 \n\t"\ 149 TRANSFER_DO_PACK(OP)\ 154 : "+r"(h), "+r" (src), "+r" (dst)\ 155 : "r"(stride), "m"(rnd)\ 160 VC1_HOR_16b_SHIFT2(
OP_PUT, put_)
161 VC1_HOR_16b_SHIFT2(
OP_AVG, avg_)
168 #define VC1_SHIFT2(OP, OPNAME)\ 169 static void OPNAME ## vc1_shift2_mmx(uint8_t *dst, const uint8_t *src,\ 170 x86_reg stride, int rnd, x86_reg offset)\ 174 "mov $8, %%"REG_c" \n\t"\ 175 LOAD_ROUNDER_MMX("%5")\ 176 "movq "MANGLE(ff_pw_9)", %%mm6\n\t"\ 178 "movd 0(%0 ), %%mm3 \n\t"\ 179 "movd 4(%0 ), %%mm4 \n\t"\ 180 "movd 0(%0,%2), %%mm1 \n\t"\ 181 "movd 4(%0,%2), %%mm2 \n\t"\ 183 "punpcklbw %%mm0, %%mm3 \n\t"\ 184 "punpcklbw %%mm0, %%mm4 \n\t"\ 185 "punpcklbw %%mm0, %%mm1 \n\t"\ 186 "punpcklbw %%mm0, %%mm2 \n\t"\ 187 "paddw %%mm1, %%mm3 \n\t"\ 188 "paddw %%mm2, %%mm4 \n\t"\ 189 "movd 0(%0,%3), %%mm1 \n\t"\ 190 "movd 4(%0,%3), %%mm2 \n\t"\ 191 "pmullw %%mm6, %%mm3 \n\t" \ 192 "pmullw %%mm6, %%mm4 \n\t" \ 193 "punpcklbw %%mm0, %%mm1 \n\t"\ 194 "punpcklbw %%mm0, %%mm2 \n\t"\ 195 "psubw %%mm1, %%mm3 \n\t" \ 196 "psubw %%mm2, %%mm4 \n\t" \ 197 "movd 0(%0,%2), %%mm1 \n\t"\ 198 "movd 4(%0,%2), %%mm2 \n\t"\ 199 "punpcklbw %%mm0, %%mm1 \n\t"\ 200 "punpcklbw %%mm0, %%mm2 \n\t"\ 201 "psubw %%mm1, %%mm3 \n\t" \ 202 "psubw %%mm2, %%mm4 \n\t" \ 204 "packuswb %%mm4, %%mm3 \n\t"\ 206 "movq %%mm3, (%1) \n\t"\ 209 "dec %%"REG_c" \n\t"\ 211 : "+r"(src), "+r"(dst)\ 212 : "r"(offset), "r"(-2*offset), "g"(stride), "m"(rnd),\ 214 : "%"REG_c, "memory"\ 219 VC1_SHIFT2(OP_AVG, avg_)
231 #define MSPEL_FILTER13_CORE(UNPACK, MOVQ, A1, A2, A3, A4) \ 232 MOVQ "*0+"A1", %%mm1 \n\t" \ 233 MOVQ "*4+"A1", %%mm2 \n\t" \ 236 "pmullw "MANGLE(ff_pw_3)", %%mm1\n\t" \ 237 "pmullw "MANGLE(ff_pw_3)", %%mm2\n\t" \ 238 MOVQ "*0+"A2", %%mm3 \n\t" \ 239 MOVQ "*4+"A2", %%mm4 \n\t" \ 242 "pmullw %%mm6, %%mm3 \n\t" \ 243 "pmullw %%mm6, %%mm4 \n\t" \ 244 "psubw %%mm1, %%mm3 \n\t" \ 245 "psubw %%mm2, %%mm4 \n\t" \ 246 MOVQ "*0+"A4", %%mm1 \n\t" \ 247 MOVQ "*4+"A4", %%mm2 \n\t" \ 250 "psllw $2, %%mm1 \n\t" \ 251 "psllw $2, %%mm2 \n\t" \ 252 "psubw %%mm1, %%mm3 \n\t" \ 253 "psubw %%mm2, %%mm4 \n\t" \ 254 MOVQ "*0+"A3", %%mm1 \n\t" \ 255 MOVQ "*4+"A3", %%mm2 \n\t" \ 258 "pmullw %%mm5, %%mm1 \n\t" \ 259 "pmullw %%mm5, %%mm2 \n\t" \ 260 "paddw %%mm1, %%mm3 \n\t" \ 261 "paddw %%mm2, %%mm4 \n\t" 271 #define MSPEL_FILTER13_VER_16B(NAME, A1, A2, A3, A4) \ 273 vc1_put_ver_16b_ ## NAME ## _mmx(int16_t *dst, const uint8_t *src, \ 274 x86_reg src_stride, \ 275 int rnd, int64_t shift) \ 280 LOAD_ROUNDER_MMX("%5") \ 281 "movq "MANGLE(ff_pw_53)", %%mm5\n\t" \ 282 "movq "MANGLE(ff_pw_18)", %%mm6\n\t" \ 285 MSPEL_FILTER13_CORE(DO_UNPACK, "movd 1", A1, A2, A3, A4) \ 286 NORMALIZE_MMX("%6") \ 287 TRANSFER_DONT_PACK(OP_PUT) \ 289 "movd 8+"A1", %%mm1 \n\t" \ 291 "movq %%mm1, %%mm3 \n\t" \ 292 "paddw %%mm1, %%mm1 \n\t" \ 293 "paddw %%mm3, %%mm1 \n\t" \ 294 "movd 8+"A2", %%mm3 \n\t" \ 296 "pmullw %%mm6, %%mm3 \n\t" \ 297 "psubw %%mm1, %%mm3 \n\t" \ 298 "movd 8+"A3", %%mm1 \n\t" \ 300 "pmullw %%mm5, %%mm1 \n\t" \ 301 "paddw %%mm1, %%mm3 \n\t" \ 302 "movd 8+"A4", %%mm1 \n\t" \ 304 "psllw $2, %%mm1 \n\t" \ 305 "psubw %%mm1, %%mm3 \n\t" \ 306 "paddw %%mm7, %%mm3 \n\t" \ 307 "psraw %6, %%mm3 \n\t" \ 308 "movq %%mm3, 16(%2) \n\t" \ 313 : "+r"(h), "+r" (src), "+r" (dst) \ 314 : "r"(src_stride), "r"(3*src_stride), \ 315 "m"(rnd), "m"(shift) \ 327 #define MSPEL_FILTER13_HOR_16B(NAME, A1, A2, A3, A4, OP, OPNAME) \ 329 OPNAME ## vc1_hor_16b_ ## NAME ## _mmx(uint8_t *dst, x86_reg stride, \ 330 const int16_t *src, int rnd) \ 334 rnd -= (-4+58+13-3)*256; \ 336 LOAD_ROUNDER_MMX("%4") \ 337 "movq "MANGLE(ff_pw_18)", %%mm6 \n\t" \ 338 "movq "MANGLE(ff_pw_53)", %%mm5 \n\t" \ 341 MSPEL_FILTER13_CORE(DONT_UNPACK, "movq 2", A1, A2, A3, A4) \ 342 NORMALIZE_MMX("$7") \ 344 "paddw "MANGLE(ff_pw_128)", %%mm3 \n\t" \ 345 "paddw "MANGLE(ff_pw_128)", %%mm4 \n\t" \ 346 TRANSFER_DO_PACK(OP) \ 351 : "+r"(h), "+r" (src), "+r" (dst) \ 352 : "r"(stride), "m"(rnd) \ 365 #define MSPEL_FILTER13_8B(NAME, A1, A2, A3, A4, OP, OPNAME) \ 367 OPNAME ## vc1_## NAME ## _mmx(uint8_t *dst, const uint8_t *src, \ 368 x86_reg stride, int rnd, x86_reg offset) \ 374 LOAD_ROUNDER_MMX("%6") \ 375 "movq "MANGLE(ff_pw_53)", %%mm5 \n\t" \ 376 "movq "MANGLE(ff_pw_18)", %%mm6 \n\t" \ 379 MSPEL_FILTER13_CORE(DO_UNPACK, "movd 1", A1, A2, A3, A4) \ 380 NORMALIZE_MMX("$6") \ 381 TRANSFER_DO_PACK(OP) \ 386 : "+r"(h), "+r" (src), "+r" (dst) \ 387 : "r"(offset), "r"(3*offset), "g"(stride), "m"(rnd) \ 393 MSPEL_FILTER13_8B (
shift1,
"0(%1,%4 )",
"0(%1,%3,2)",
"0(%1,%3 )",
"0(%1 )",
OP_PUT, put_)
394 MSPEL_FILTER13_8B (
shift1, "0(%1,%4 )", "0(%1,%3,2)", "0(%1,%3 )", "0(%1 )", OP_AVG, avg_)
395 MSPEL_FILTER13_VER_16B(shift1, "0(%1,%4 )", "0(%1,%3,2)", "0(%1,%3 )", "0(%1 )")
396 MSPEL_FILTER13_HOR_16B(shift1, "2*3(%1)", "2*2(%1)", "2*1(%1)", "2*0(%1)",
OP_PUT, put_)
397 MSPEL_FILTER13_HOR_16B(shift1, "2*3(%1)", "2*2(%1)", "2*1(%1)", "2*0(%1)", OP_AVG, avg_)
400 MSPEL_FILTER13_8B (shift3, "0(%1 )", "0(%1,%3 )", "0(%1,%3,2)", "0(%1,%4 )", OP_PUT, put_)
401 MSPEL_FILTER13_8B (shift3, "0(%1 )", "0(%1,%3 )", "0(%1,%3,2)", "0(%1,%4 )", OP_AVG, avg_)
402 MSPEL_FILTER13_VER_16B(shift3, "0(%1 )", "0(%1,%3 )", "0(%1,%3,2)", "0(%1,%4 )")
403 MSPEL_FILTER13_HOR_16B(shift3, "2*0(%1)", "2*1(%1)", "2*2(%1)", "2*3(%1)", OP_PUT, put_)
404 MSPEL_FILTER13_HOR_16B(shift3, "2*0(%1)", "2*1(%1)", "2*2(%1)", "2*3(%1)", OP_AVG, avg_)
406 typedef
void (*vc1_mspel_mc_filter_ver_16bits)(int16_t *dst, const
uint8_t *src,
x86_reg src_stride,
int rnd, int64_t shift);
407 typedef
void (*vc1_mspel_mc_filter_hor_16bits)(
uint8_t *dst,
x86_reg dst_stride, const int16_t *src,
int rnd);
421 #define VC1_MSPEL_MC(OP)\ 422 static void OP ## vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride,\ 423 int hmode, int vmode, int rnd)\ 425 static const vc1_mspel_mc_filter_ver_16bits vc1_put_shift_ver_16bits[] =\ 426 { NULL, vc1_put_ver_16b_shift1_mmx, vc1_put_ver_16b_shift2_mmx, vc1_put_ver_16b_shift3_mmx };\ 427 static const vc1_mspel_mc_filter_hor_16bits vc1_put_shift_hor_16bits[] =\ 428 { NULL, OP ## vc1_hor_16b_shift1_mmx, OP ## vc1_hor_16b_shift2_mmx, OP ## vc1_hor_16b_shift3_mmx };\ 429 static const vc1_mspel_mc_filter_8bits vc1_put_shift_8bits[] =\ 430 { NULL, OP ## vc1_shift1_mmx, OP ## vc1_shift2_mmx, OP ## vc1_shift3_mmx };\ 433 "pxor %%mm0, %%mm0 \n\t"\ 439 static const int shift_value[] = { 0, 5, 1, 5 };\ 440 int shift = (shift_value[hmode]+shift_value[vmode])>>1;\ 442 DECLARE_ALIGNED(16, int16_t, tmp)[12*8];\ 444 r = (1<<(shift-1)) + rnd-1;\ 445 vc1_put_shift_ver_16bits[vmode](tmp, src-1, stride, r, shift);\ 447 vc1_put_shift_hor_16bits[hmode](dst, stride, tmp+1, 64-rnd);\ 451 vc1_put_shift_8bits[vmode](dst, src, stride, 1-rnd, stride);\ 457 vc1_put_shift_8bits[hmode](dst, src, stride, rnd, 1);\ 464 #define DECLARE_FUNCTION(a, b) \ 465 static void put_vc1_mspel_mc ## a ## b ## _mmx(uint8_t *dst, \ 466 const uint8_t *src, \ 470 put_vc1_mspel_mc(dst, src, stride, a, b, rnd); \ 472 static void avg_vc1_mspel_mc ## a ## b ## _mmxext(uint8_t *dst, \ 473 const uint8_t *src, \ 477 avg_vc1_mspel_mc(dst, src, stride, a, b, rnd); \ 480 DECLARE_FUNCTION(0, 1)
481 DECLARE_FUNCTION(0, 2)
482 DECLARE_FUNCTION(0, 3)
484 DECLARE_FUNCTION(1, 0)
485 DECLARE_FUNCTION(1, 1)
486 DECLARE_FUNCTION(1, 2)
487 DECLARE_FUNCTION(1, 3)
489 DECLARE_FUNCTION(2, 0)
490 DECLARE_FUNCTION(2, 1)
491 DECLARE_FUNCTION(2, 2)
492 DECLARE_FUNCTION(2, 3)
494 DECLARE_FUNCTION(3, 0)
495 DECLARE_FUNCTION(3, 1)
496 DECLARE_FUNCTION(3, 2)
497 DECLARE_FUNCTION(3, 3)
499 static
void vc1_inv_trans_4x4_dc_mmxext(
uint8_t *
dest,
int linesize,
503 dc = (17 * dc + 4) >> 3;
504 dc = (17 * dc + 64) >> 7;
506 "movd %0, %%mm0 \n\t" 507 "pshufw $0, %%mm0, %%mm0 \n\t" 508 "pxor %%mm1, %%mm1 \n\t" 509 "psubw %%mm0, %%mm1 \n\t" 510 "packuswb %%mm0, %%mm0 \n\t" 511 "packuswb %%mm1, %%mm1 \n\t" 515 "movd %0, %%mm2 \n\t" 516 "movd %1, %%mm3 \n\t" 517 "movd %2, %%mm4 \n\t" 518 "movd %3, %%mm5 \n\t" 519 "paddusb %%mm0, %%mm2 \n\t" 520 "paddusb %%mm0, %%mm3 \n\t" 521 "paddusb %%mm0, %%mm4 \n\t" 522 "paddusb %%mm0, %%mm5 \n\t" 523 "psubusb %%mm1, %%mm2 \n\t" 524 "psubusb %%mm1, %%mm3 \n\t" 525 "psubusb %%mm1, %%mm4 \n\t" 526 "psubusb %%mm1, %%mm5 \n\t" 527 "movd %%mm2, %0 \n\t" 528 "movd %%mm3, %1 \n\t" 529 "movd %%mm4, %2 \n\t" 530 "movd %%mm5, %3 \n\t" 531 :
"+m"(*(uint32_t*)(
dest+0*linesize)),
532 "+m"(*(uint32_t*)(
dest+1*linesize)),
533 "+m"(*(uint32_t*)(
dest+2*linesize)),
534 "+m"(*(uint32_t*)(
dest+3*linesize))
538 static void vc1_inv_trans_4x8_dc_mmxext(
uint8_t *
dest,
int linesize,
542 dc = (17 * dc + 4) >> 3;
543 dc = (12 * dc + 64) >> 7;
545 "movd %0, %%mm0 \n\t" 546 "pshufw $0, %%mm0, %%mm0 \n\t" 547 "pxor %%mm1, %%mm1 \n\t" 548 "psubw %%mm0, %%mm1 \n\t" 549 "packuswb %%mm0, %%mm0 \n\t" 550 "packuswb %%mm1, %%mm1 \n\t" 554 "movd %0, %%mm2 \n\t" 555 "movd %1, %%mm3 \n\t" 556 "movd %2, %%mm4 \n\t" 557 "movd %3, %%mm5 \n\t" 558 "paddusb %%mm0, %%mm2 \n\t" 559 "paddusb %%mm0, %%mm3 \n\t" 560 "paddusb %%mm0, %%mm4 \n\t" 561 "paddusb %%mm0, %%mm5 \n\t" 562 "psubusb %%mm1, %%mm2 \n\t" 563 "psubusb %%mm1, %%mm3 \n\t" 564 "psubusb %%mm1, %%mm4 \n\t" 565 "psubusb %%mm1, %%mm5 \n\t" 566 "movd %%mm2, %0 \n\t" 567 "movd %%mm3, %1 \n\t" 568 "movd %%mm4, %2 \n\t" 569 "movd %%mm5, %3 \n\t" 570 :
"+m"(*(uint32_t*)(
dest+0*linesize)),
571 "+m"(*(uint32_t*)(
dest+1*linesize)),
572 "+m"(*(uint32_t*)(
dest+2*linesize)),
573 "+m"(*(uint32_t*)(
dest+3*linesize))
577 "movd %0, %%mm2 \n\t" 578 "movd %1, %%mm3 \n\t" 579 "movd %2, %%mm4 \n\t" 580 "movd %3, %%mm5 \n\t" 581 "paddusb %%mm0, %%mm2 \n\t" 582 "paddusb %%mm0, %%mm3 \n\t" 583 "paddusb %%mm0, %%mm4 \n\t" 584 "paddusb %%mm0, %%mm5 \n\t" 585 "psubusb %%mm1, %%mm2 \n\t" 586 "psubusb %%mm1, %%mm3 \n\t" 587 "psubusb %%mm1, %%mm4 \n\t" 588 "psubusb %%mm1, %%mm5 \n\t" 589 "movd %%mm2, %0 \n\t" 590 "movd %%mm3, %1 \n\t" 591 "movd %%mm4, %2 \n\t" 592 "movd %%mm5, %3 \n\t" 593 :
"+m"(*(uint32_t*)(dest+0*linesize)),
594 "+m"(*(uint32_t*)(dest+1*linesize)),
595 "+m"(*(uint32_t*)(dest+2*linesize)),
596 "+m"(*(uint32_t*)(dest+3*linesize))
600 static void vc1_inv_trans_8x4_dc_mmxext(
uint8_t *dest,
int linesize,
604 dc = ( 3 * dc + 1) >> 1;
605 dc = (17 * dc + 64) >> 7;
607 "movd %0, %%mm0 \n\t" 608 "pshufw $0, %%mm0, %%mm0 \n\t" 609 "pxor %%mm1, %%mm1 \n\t" 610 "psubw %%mm0, %%mm1 \n\t" 611 "packuswb %%mm0, %%mm0 \n\t" 612 "packuswb %%mm1, %%mm1 \n\t" 616 "movq %0, %%mm2 \n\t" 617 "movq %1, %%mm3 \n\t" 618 "movq %2, %%mm4 \n\t" 619 "movq %3, %%mm5 \n\t" 620 "paddusb %%mm0, %%mm2 \n\t" 621 "paddusb %%mm0, %%mm3 \n\t" 622 "paddusb %%mm0, %%mm4 \n\t" 623 "paddusb %%mm0, %%mm5 \n\t" 624 "psubusb %%mm1, %%mm2 \n\t" 625 "psubusb %%mm1, %%mm3 \n\t" 626 "psubusb %%mm1, %%mm4 \n\t" 627 "psubusb %%mm1, %%mm5 \n\t" 628 "movq %%mm2, %0 \n\t" 629 "movq %%mm3, %1 \n\t" 630 "movq %%mm4, %2 \n\t" 631 "movq %%mm5, %3 \n\t" 632 :
"+m"(*(uint32_t*)(dest+0*linesize)),
633 "+m"(*(uint32_t*)(dest+1*linesize)),
634 "+m"(*(uint32_t*)(dest+2*linesize)),
635 "+m"(*(uint32_t*)(dest+3*linesize))
639 static void vc1_inv_trans_8x8_dc_mmxext(
uint8_t *dest,
int linesize,
643 dc = (3 * dc + 1) >> 1;
644 dc = (3 * dc + 16) >> 5;
646 "movd %0, %%mm0 \n\t" 647 "pshufw $0, %%mm0, %%mm0 \n\t" 648 "pxor %%mm1, %%mm1 \n\t" 649 "psubw %%mm0, %%mm1 \n\t" 650 "packuswb %%mm0, %%mm0 \n\t" 651 "packuswb %%mm1, %%mm1 \n\t" 655 "movq %0, %%mm2 \n\t" 656 "movq %1, %%mm3 \n\t" 657 "movq %2, %%mm4 \n\t" 658 "movq %3, %%mm5 \n\t" 659 "paddusb %%mm0, %%mm2 \n\t" 660 "paddusb %%mm0, %%mm3 \n\t" 661 "paddusb %%mm0, %%mm4 \n\t" 662 "paddusb %%mm0, %%mm5 \n\t" 663 "psubusb %%mm1, %%mm2 \n\t" 664 "psubusb %%mm1, %%mm3 \n\t" 665 "psubusb %%mm1, %%mm4 \n\t" 666 "psubusb %%mm1, %%mm5 \n\t" 667 "movq %%mm2, %0 \n\t" 668 "movq %%mm3, %1 \n\t" 669 "movq %%mm4, %2 \n\t" 670 "movq %%mm5, %3 \n\t" 671 :
"+m"(*(uint32_t*)(dest+0*linesize)),
672 "+m"(*(uint32_t*)(dest+1*linesize)),
673 "+m"(*(uint32_t*)(dest+2*linesize)),
674 "+m"(*(uint32_t*)(dest+3*linesize))
678 "movq %0, %%mm2 \n\t" 679 "movq %1, %%mm3 \n\t" 680 "movq %2, %%mm4 \n\t" 681 "movq %3, %%mm5 \n\t" 682 "paddusb %%mm0, %%mm2 \n\t" 683 "paddusb %%mm0, %%mm3 \n\t" 684 "paddusb %%mm0, %%mm4 \n\t" 685 "paddusb %%mm0, %%mm5 \n\t" 686 "psubusb %%mm1, %%mm2 \n\t" 687 "psubusb %%mm1, %%mm3 \n\t" 688 "psubusb %%mm1, %%mm4 \n\t" 689 "psubusb %%mm1, %%mm5 \n\t" 690 "movq %%mm2, %0 \n\t" 691 "movq %%mm3, %1 \n\t" 692 "movq %%mm4, %2 \n\t" 693 "movq %%mm5, %3 \n\t" 694 :
"+m"(*(uint32_t*)(dest+0*linesize)),
695 "+m"(*(uint32_t*)(dest+1*linesize)),
696 "+m"(*(uint32_t*)(dest+2*linesize)),
697 "+m"(*(uint32_t*)(dest+3*linesize))
static int shift(int a, int b)
memory handling functions
void(* vc1_inv_trans_8x4_dc)(uint8_t *dest, int line_size, int16_t *block)
vc1op_pixels_func put_vc1_mspel_pixels_tab[16]
void ff_vc1dsp_init_mmx(VC1DSPContext *dsp)
static const int shift1[6]
static const uint8_t offset[127][2]
void(* vc1_inv_trans_8x8_dc)(uint8_t *dest, int line_size, int16_t *block)
void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int rnd)
vc1op_pixels_func avg_vc1_mspel_pixels_tab[16]
void(* vc1_inv_trans_4x8_dc)(uint8_t *dest, int line_size, int16_t *block)
void ff_vc1dsp_init_mmxext(VC1DSPContext *dsp)
FIXME Range Coding of cr are mx and my are Motion Vector top and top right vectors is used as motion vector prediction the used motion vector is the sum of the predictor and(mvx_diff, mvy_diff)*mv_scale Intra DC Predicton block[y][x] dc[1]
#define VC1_MSPEL_MC(OP, OP4, OPNAME)
Function used to do motion compensation with bicubic interpolation.
else dst[i][x+y *dst_stride[i]]
void(* vc1_inv_trans_4x4_dc)(uint8_t *dest, int line_size, int16_t *block)