32 int16_t *block,
int n,
int qscale)
45 qadd = (qscale - 1) | 1;
57 "packssdw %%mm6, %%mm6 \n\t" 58 "packssdw %%mm6, %%mm6 \n\t" 60 "pxor %%mm7, %%mm7 \n\t" 61 "packssdw %%mm5, %%mm5 \n\t" 62 "packssdw %%mm5, %%mm5 \n\t" 63 "psubw %%mm5, %%mm7 \n\t" 64 "pxor %%mm4, %%mm4 \n\t" 67 "movq (%0, %3), %%mm0 \n\t" 68 "movq 8(%0, %3), %%mm1 \n\t" 70 "pmullw %%mm6, %%mm0 \n\t" 71 "pmullw %%mm6, %%mm1 \n\t" 73 "movq (%0, %3), %%mm2 \n\t" 74 "movq 8(%0, %3), %%mm3 \n\t" 76 "pcmpgtw %%mm4, %%mm2 \n\t" 77 "pcmpgtw %%mm4, %%mm3 \n\t" 79 "pxor %%mm2, %%mm0 \n\t" 80 "pxor %%mm3, %%mm1 \n\t" 82 "paddw %%mm7, %%mm0 \n\t" 83 "paddw %%mm7, %%mm1 \n\t" 85 "pxor %%mm0, %%mm2 \n\t" 86 "pxor %%mm1, %%mm3 \n\t" 88 "pcmpeqw %%mm7, %%mm0 \n\t" 89 "pcmpeqw %%mm7, %%mm1 \n\t" 91 "pandn %%mm2, %%mm0 \n\t" 92 "pandn %%mm3, %%mm1 \n\t" 94 "movq %%mm0, (%0, %3) \n\t" 95 "movq %%mm1, 8(%0, %3) \n\t" 99 ::
"r" (block+nCoeffs),
"rm"(qmul),
"rm" (qadd),
"r" (2*(-nCoeffs))
107 int16_t *block,
int n,
int qscale)
112 qadd = (qscale - 1) | 1;
119 "movd %1, %%mm6 \n\t" 120 "packssdw %%mm6, %%mm6 \n\t" 121 "packssdw %%mm6, %%mm6 \n\t" 122 "movd %2, %%mm5 \n\t" 123 "pxor %%mm7, %%mm7 \n\t" 124 "packssdw %%mm5, %%mm5 \n\t" 125 "packssdw %%mm5, %%mm5 \n\t" 126 "psubw %%mm5, %%mm7 \n\t" 127 "pxor %%mm4, %%mm4 \n\t" 130 "movq (%0, %3), %%mm0 \n\t" 131 "movq 8(%0, %3), %%mm1 \n\t" 133 "pmullw %%mm6, %%mm0 \n\t" 134 "pmullw %%mm6, %%mm1 \n\t" 136 "movq (%0, %3), %%mm2 \n\t" 137 "movq 8(%0, %3), %%mm3 \n\t" 139 "pcmpgtw %%mm4, %%mm2 \n\t" 140 "pcmpgtw %%mm4, %%mm3 \n\t" 142 "pxor %%mm2, %%mm0 \n\t" 143 "pxor %%mm3, %%mm1 \n\t" 145 "paddw %%mm7, %%mm0 \n\t" 146 "paddw %%mm7, %%mm1 \n\t" 148 "pxor %%mm0, %%mm2 \n\t" 149 "pxor %%mm1, %%mm3 \n\t" 151 "pcmpeqw %%mm7, %%mm0 \n\t" 152 "pcmpeqw %%mm7, %%mm1 \n\t" 154 "pandn %%mm2, %%mm0 \n\t" 155 "pandn %%mm3, %%mm1 \n\t" 157 "movq %%mm0, (%0, %3) \n\t" 158 "movq %%mm1, 8(%0, %3) \n\t" 162 ::
"r" (block+nCoeffs),
"rm"(qmul),
"rm" (qadd),
"r" (2*(-nCoeffs))
190 int16_t *block,
int n,
int qscale)
193 const uint16_t *quant_matrix;
207 "pcmpeqw %%mm7, %%mm7 \n\t" 208 "psrlw $15, %%mm7 \n\t" 209 "movd %2, %%mm6 \n\t" 210 "packssdw %%mm6, %%mm6 \n\t" 211 "packssdw %%mm6, %%mm6 \n\t" 212 "mov %3, %%"REG_a
" \n\t" 215 "movq (%0, %%"REG_a
"), %%mm0 \n\t" 216 "movq 8(%0, %%"REG_a
"), %%mm1 \n\t" 217 "movq (%1, %%"REG_a
"), %%mm4 \n\t" 218 "movq 8(%1, %%"REG_a
"), %%mm5 \n\t" 219 "pmullw %%mm6, %%mm4 \n\t" 220 "pmullw %%mm6, %%mm5 \n\t" 221 "pxor %%mm2, %%mm2 \n\t" 222 "pxor %%mm3, %%mm3 \n\t" 223 "pcmpgtw %%mm0, %%mm2 \n\t" 224 "pcmpgtw %%mm1, %%mm3 \n\t" 225 "pxor %%mm2, %%mm0 \n\t" 226 "pxor %%mm3, %%mm1 \n\t" 227 "psubw %%mm2, %%mm0 \n\t" 228 "psubw %%mm3, %%mm1 \n\t" 229 "pmullw %%mm4, %%mm0 \n\t" 230 "pmullw %%mm5, %%mm1 \n\t" 231 "pxor %%mm4, %%mm4 \n\t" 232 "pxor %%mm5, %%mm5 \n\t" 233 "pcmpeqw (%0, %%"REG_a
"), %%mm4 \n\t" 234 "pcmpeqw 8(%0, %%"REG_a
"), %%mm5\n\t" 235 "psraw $3, %%mm0 \n\t" 236 "psraw $3, %%mm1 \n\t" 237 "psubw %%mm7, %%mm0 \n\t" 238 "psubw %%mm7, %%mm1 \n\t" 239 "por %%mm7, %%mm0 \n\t" 240 "por %%mm7, %%mm1 \n\t" 241 "pxor %%mm2, %%mm0 \n\t" 242 "pxor %%mm3, %%mm1 \n\t" 243 "psubw %%mm2, %%mm0 \n\t" 244 "psubw %%mm3, %%mm1 \n\t" 245 "pandn %%mm0, %%mm4 \n\t" 246 "pandn %%mm1, %%mm5 \n\t" 247 "movq %%mm4, (%0, %%"REG_a
") \n\t" 248 "movq %%mm5, 8(%0, %%"REG_a
") \n\t" 250 "add $16, %%"REG_a
" \n\t" 252 ::
"r" (block+nCoeffs),
"r"(quant_matrix+nCoeffs),
"rm" (qscale),
"g" (-2*nCoeffs)
259 int16_t *block,
int n,
int qscale)
262 const uint16_t *quant_matrix;
270 "pcmpeqw %%mm7, %%mm7 \n\t" 271 "psrlw $15, %%mm7 \n\t" 272 "movd %2, %%mm6 \n\t" 273 "packssdw %%mm6, %%mm6 \n\t" 274 "packssdw %%mm6, %%mm6 \n\t" 275 "mov %3, %%"REG_a
" \n\t" 278 "movq (%0, %%"REG_a
"), %%mm0 \n\t" 279 "movq 8(%0, %%"REG_a
"), %%mm1 \n\t" 280 "movq (%1, %%"REG_a
"), %%mm4 \n\t" 281 "movq 8(%1, %%"REG_a
"), %%mm5 \n\t" 282 "pmullw %%mm6, %%mm4 \n\t" 283 "pmullw %%mm6, %%mm5 \n\t" 284 "pxor %%mm2, %%mm2 \n\t" 285 "pxor %%mm3, %%mm3 \n\t" 286 "pcmpgtw %%mm0, %%mm2 \n\t" 287 "pcmpgtw %%mm1, %%mm3 \n\t" 288 "pxor %%mm2, %%mm0 \n\t" 289 "pxor %%mm3, %%mm1 \n\t" 290 "psubw %%mm2, %%mm0 \n\t" 291 "psubw %%mm3, %%mm1 \n\t" 292 "paddw %%mm0, %%mm0 \n\t" 293 "paddw %%mm1, %%mm1 \n\t" 294 "paddw %%mm7, %%mm0 \n\t" 295 "paddw %%mm7, %%mm1 \n\t" 296 "pmullw %%mm4, %%mm0 \n\t" 297 "pmullw %%mm5, %%mm1 \n\t" 298 "pxor %%mm4, %%mm4 \n\t" 299 "pxor %%mm5, %%mm5 \n\t" 300 "pcmpeqw (%0, %%"REG_a
"), %%mm4 \n\t" 301 "pcmpeqw 8(%0, %%"REG_a
"), %%mm5\n\t" 302 "psraw $4, %%mm0 \n\t" 303 "psraw $4, %%mm1 \n\t" 304 "psubw %%mm7, %%mm0 \n\t" 305 "psubw %%mm7, %%mm1 \n\t" 306 "por %%mm7, %%mm0 \n\t" 307 "por %%mm7, %%mm1 \n\t" 308 "pxor %%mm2, %%mm0 \n\t" 309 "pxor %%mm3, %%mm1 \n\t" 310 "psubw %%mm2, %%mm0 \n\t" 311 "psubw %%mm3, %%mm1 \n\t" 312 "pandn %%mm0, %%mm4 \n\t" 313 "pandn %%mm1, %%mm5 \n\t" 314 "movq %%mm4, (%0, %%"REG_a
") \n\t" 315 "movq %%mm5, 8(%0, %%"REG_a
") \n\t" 317 "add $16, %%"REG_a
" \n\t" 319 ::
"r" (block+nCoeffs),
"r"(quant_matrix+nCoeffs),
"rm" (qscale),
"g" (-2*nCoeffs)
325 int16_t *block,
int n,
int qscale)
328 const uint16_t *quant_matrix;
342 "pcmpeqw %%mm7, %%mm7 \n\t" 343 "psrlw $15, %%mm7 \n\t" 344 "movd %2, %%mm6 \n\t" 345 "packssdw %%mm6, %%mm6 \n\t" 346 "packssdw %%mm6, %%mm6 \n\t" 347 "mov %3, %%"REG_a
" \n\t" 350 "movq (%0, %%"REG_a
"), %%mm0 \n\t" 351 "movq 8(%0, %%"REG_a
"), %%mm1 \n\t" 352 "movq (%1, %%"REG_a
"), %%mm4 \n\t" 353 "movq 8(%1, %%"REG_a
"), %%mm5 \n\t" 354 "pmullw %%mm6, %%mm4 \n\t" 355 "pmullw %%mm6, %%mm5 \n\t" 356 "pxor %%mm2, %%mm2 \n\t" 357 "pxor %%mm3, %%mm3 \n\t" 358 "pcmpgtw %%mm0, %%mm2 \n\t" 359 "pcmpgtw %%mm1, %%mm3 \n\t" 360 "pxor %%mm2, %%mm0 \n\t" 361 "pxor %%mm3, %%mm1 \n\t" 362 "psubw %%mm2, %%mm0 \n\t" 363 "psubw %%mm3, %%mm1 \n\t" 364 "pmullw %%mm4, %%mm0 \n\t" 365 "pmullw %%mm5, %%mm1 \n\t" 366 "pxor %%mm4, %%mm4 \n\t" 367 "pxor %%mm5, %%mm5 \n\t" 368 "pcmpeqw (%0, %%"REG_a
"), %%mm4 \n\t" 369 "pcmpeqw 8(%0, %%"REG_a
"), %%mm5\n\t" 370 "psraw $3, %%mm0 \n\t" 371 "psraw $3, %%mm1 \n\t" 372 "pxor %%mm2, %%mm0 \n\t" 373 "pxor %%mm3, %%mm1 \n\t" 374 "psubw %%mm2, %%mm0 \n\t" 375 "psubw %%mm3, %%mm1 \n\t" 376 "pandn %%mm0, %%mm4 \n\t" 377 "pandn %%mm1, %%mm5 \n\t" 378 "movq %%mm4, (%0, %%"REG_a
") \n\t" 379 "movq %%mm5, 8(%0, %%"REG_a
") \n\t" 381 "add $16, %%"REG_a
" \n\t" 383 ::
"r" (block+nCoeffs),
"r"(quant_matrix+nCoeffs),
"rm" (qscale),
"g" (-2*nCoeffs)
391 int16_t *block,
int n,
int qscale)
394 const uint16_t *quant_matrix;
403 "pcmpeqw %%mm7, %%mm7 \n\t" 404 "psrlq $48, %%mm7 \n\t" 405 "movd %2, %%mm6 \n\t" 406 "packssdw %%mm6, %%mm6 \n\t" 407 "packssdw %%mm6, %%mm6 \n\t" 408 "mov %3, %%"REG_a
" \n\t" 411 "movq (%0, %%"REG_a
"), %%mm0 \n\t" 412 "movq 8(%0, %%"REG_a
"), %%mm1 \n\t" 413 "movq (%1, %%"REG_a
"), %%mm4 \n\t" 414 "movq 8(%1, %%"REG_a
"), %%mm5 \n\t" 415 "pmullw %%mm6, %%mm4 \n\t" 416 "pmullw %%mm6, %%mm5 \n\t" 417 "pxor %%mm2, %%mm2 \n\t" 418 "pxor %%mm3, %%mm3 \n\t" 419 "pcmpgtw %%mm0, %%mm2 \n\t" 420 "pcmpgtw %%mm1, %%mm3 \n\t" 421 "pxor %%mm2, %%mm0 \n\t" 422 "pxor %%mm3, %%mm1 \n\t" 423 "psubw %%mm2, %%mm0 \n\t" 424 "psubw %%mm3, %%mm1 \n\t" 425 "paddw %%mm0, %%mm0 \n\t" 426 "paddw %%mm1, %%mm1 \n\t" 427 "pmullw %%mm4, %%mm0 \n\t" 428 "pmullw %%mm5, %%mm1 \n\t" 429 "paddw %%mm4, %%mm0 \n\t" 430 "paddw %%mm5, %%mm1 \n\t" 431 "pxor %%mm4, %%mm4 \n\t" 432 "pxor %%mm5, %%mm5 \n\t" 433 "pcmpeqw (%0, %%"REG_a
"), %%mm4 \n\t" 434 "pcmpeqw 8(%0, %%"REG_a
"), %%mm5\n\t" 435 "psrlw $4, %%mm0 \n\t" 436 "psrlw $4, %%mm1 \n\t" 437 "pxor %%mm2, %%mm0 \n\t" 438 "pxor %%mm3, %%mm1 \n\t" 439 "psubw %%mm2, %%mm0 \n\t" 440 "psubw %%mm3, %%mm1 \n\t" 441 "pandn %%mm0, %%mm4 \n\t" 442 "pandn %%mm1, %%mm5 \n\t" 443 "pxor %%mm4, %%mm7 \n\t" 444 "pxor %%mm5, %%mm7 \n\t" 445 "movq %%mm4, (%0, %%"REG_a
") \n\t" 446 "movq %%mm5, 8(%0, %%"REG_a
") \n\t" 448 "add $16, %%"REG_a
" \n\t" 450 "movd 124(%0, %3), %%mm0 \n\t" 451 "movq %%mm7, %%mm6 \n\t" 452 "psrlq $32, %%mm7 \n\t" 453 "pxor %%mm6, %%mm7 \n\t" 454 "movq %%mm7, %%mm6 \n\t" 455 "psrlq $16, %%mm7 \n\t" 456 "pxor %%mm6, %%mm7 \n\t" 457 "pslld $31, %%mm7 \n\t" 458 "psrlq $15, %%mm7 \n\t" 459 "pxor %%mm7, %%mm0 \n\t" 460 "movd %%mm0, 124(%0, %3) \n\t" 462 ::
"r" (block+nCoeffs),
"r"(quant_matrix+nCoeffs),
"rm" (qscale),
"r" (-2*nCoeffs)
475 "pxor %%mm7, %%mm7 \n\t" 477 "pxor %%mm0, %%mm0 \n\t" 478 "pxor %%mm1, %%mm1 \n\t" 479 "movq (%0), %%mm2 \n\t" 480 "movq 8(%0), %%mm3 \n\t" 481 "pcmpgtw %%mm2, %%mm0 \n\t" 482 "pcmpgtw %%mm3, %%mm1 \n\t" 483 "pxor %%mm0, %%mm2 \n\t" 484 "pxor %%mm1, %%mm3 \n\t" 485 "psubw %%mm0, %%mm2 \n\t" 486 "psubw %%mm1, %%mm3 \n\t" 487 "movq %%mm2, %%mm4 \n\t" 488 "movq %%mm3, %%mm5 \n\t" 489 "psubusw (%2), %%mm2 \n\t" 490 "psubusw 8(%2), %%mm3 \n\t" 491 "pxor %%mm0, %%mm2 \n\t" 492 "pxor %%mm1, %%mm3 \n\t" 493 "psubw %%mm0, %%mm2 \n\t" 494 "psubw %%mm1, %%mm3 \n\t" 495 "movq %%mm2, (%0) \n\t" 496 "movq %%mm3, 8(%0) \n\t" 497 "movq %%mm4, %%mm2 \n\t" 498 "movq %%mm5, %%mm3 \n\t" 499 "punpcklwd %%mm7, %%mm4 \n\t" 500 "punpckhwd %%mm7, %%mm2 \n\t" 501 "punpcklwd %%mm7, %%mm5 \n\t" 502 "punpckhwd %%mm7, %%mm3 \n\t" 503 "paddd (%1), %%mm4 \n\t" 504 "paddd 8(%1), %%mm2 \n\t" 505 "paddd 16(%1), %%mm5 \n\t" 506 "paddd 24(%1), %%mm3 \n\t" 507 "movq %%mm4, (%1) \n\t" 508 "movq %%mm2, 8(%1) \n\t" 509 "movq %%mm5, 16(%1) \n\t" 510 "movq %%mm3, 24(%1) \n\t" 516 :
"+r" (block),
"+r" (sum),
"+r" (
offset)
529 "pxor %%xmm7, %%xmm7 \n\t" 531 "pxor %%xmm0, %%xmm0 \n\t" 532 "pxor %%xmm1, %%xmm1 \n\t" 533 "movdqa (%0), %%xmm2 \n\t" 534 "movdqa 16(%0), %%xmm3 \n\t" 535 "pcmpgtw %%xmm2, %%xmm0 \n\t" 536 "pcmpgtw %%xmm3, %%xmm1 \n\t" 537 "pxor %%xmm0, %%xmm2 \n\t" 538 "pxor %%xmm1, %%xmm3 \n\t" 539 "psubw %%xmm0, %%xmm2 \n\t" 540 "psubw %%xmm1, %%xmm3 \n\t" 541 "movdqa %%xmm2, %%xmm4 \n\t" 542 "movdqa %%xmm3, %%xmm5 \n\t" 543 "psubusw (%2), %%xmm2 \n\t" 544 "psubusw 16(%2), %%xmm3 \n\t" 545 "pxor %%xmm0, %%xmm2 \n\t" 546 "pxor %%xmm1, %%xmm3 \n\t" 547 "psubw %%xmm0, %%xmm2 \n\t" 548 "psubw %%xmm1, %%xmm3 \n\t" 549 "movdqa %%xmm2, (%0) \n\t" 550 "movdqa %%xmm3, 16(%0) \n\t" 551 "movdqa %%xmm4, %%xmm6 \n\t" 552 "movdqa %%xmm5, %%xmm0 \n\t" 553 "punpcklwd %%xmm7, %%xmm4 \n\t" 554 "punpckhwd %%xmm7, %%xmm6 \n\t" 555 "punpcklwd %%xmm7, %%xmm5 \n\t" 556 "punpckhwd %%xmm7, %%xmm0 \n\t" 557 "paddd (%1), %%xmm4 \n\t" 558 "paddd 16(%1), %%xmm6 \n\t" 559 "paddd 32(%1), %%xmm5 \n\t" 560 "paddd 48(%1), %%xmm0 \n\t" 561 "movdqa %%xmm4, (%1) \n\t" 562 "movdqa %%xmm6, 16(%1) \n\t" 563 "movdqa %%xmm5, 32(%1) \n\t" 564 "movdqa %%xmm0, 48(%1) \n\t" 570 :
"+r" (block),
"+r" (sum),
"+r" (
offset)
573 "%xmm4",
"%xmm5",
"%xmm6",
"%xmm7")
av_cold void ff_MPV_common_init_x86(MpegEncContext *s)
void(* dct_unquantize_h263_inter)(struct MpegEncContext *s, int16_t *block, int n, int qscale)
int h263_aic
Advanded INTRA Coding (AIC)
Macro definitions for various function/variable attributes.
#define av_assert2(cond)
assert() equivalent, that does lie in speed critical code.
uint16_t(* dct_offset)[64]
#define CODEC_FLAG_BITEXACT
Use only bitexact stuff (except (I)DCT).
static const uint8_t offset[127][2]
void(* dct_unquantize_mpeg1_intra)(struct MpegEncContext *s, int16_t *block, int n, int qscale)
int block_last_index[12]
last non zero coefficient in block
FIXME Range Coding of cr are level
void(* dct_unquantize_mpeg2_inter)(struct MpegEncContext *s, int16_t *block, int n, int qscale)
uint16_t inter_matrix[64]
#define AV_CPU_FLAG_MMX
standard MMX
ScanTable intra_scantable
void(* dct_unquantize_mpeg2_intra)(struct MpegEncContext *s, int16_t *block, int n, int qscale)
void(* dct_unquantize_h263_intra)(struct MpegEncContext *s, int16_t *block, int n, int qscale)
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
void(* denoise_dct)(struct MpegEncContext *s, int16_t *block)
#define XMM_CLOBBERS_ONLY(...)
int flags
AVCodecContext.flags (HQ, MV4, ...)
#define AV_CPU_FLAG_SSE2
PIV SSE2 functions.
uint16_t intra_matrix[64]
matrix transmitted in the bitstream
ScanTable inter_scantable
if inter == intra then intra should be used to reduce tha cache usage
void(* dct_unquantize_mpeg1_inter)(struct MpegEncContext *s, int16_t *block, int n, int qscale)