34 #if (HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE) 37 #define SUMSUB_BA( a, b ) \ 38 "paddw "#b", "#a" \n\t"\ 39 "paddw "#b", "#b" \n\t"\ 40 "psubw "#a", "#b" \n\t" 48 static inline void cavs_idct8_1d(int16_t *block, uint64_t bias)
51 "movq 112(%0), %%mm4 \n\t" 52 "movq 16(%0), %%mm5 \n\t" 53 "movq 80(%0), %%mm2 \n\t" 54 "movq 48(%0), %%mm7 \n\t" 55 "movq %%mm4, %%mm0 \n\t" 56 "movq %%mm5, %%mm3 \n\t" 57 "movq %%mm2, %%mm6 \n\t" 58 "movq %%mm7, %%mm1 \n\t" 60 "paddw %%mm4, %%mm4 \n\t" 61 "paddw %%mm3, %%mm3 \n\t" 62 "paddw %%mm6, %%mm6 \n\t" 63 "paddw %%mm1, %%mm1 \n\t" 64 "paddw %%mm4, %%mm0 \n\t" 65 "paddw %%mm3, %%mm5 \n\t" 66 "paddw %%mm6, %%mm2 \n\t" 67 "paddw %%mm1, %%mm7 \n\t" 68 "psubw %%mm4, %%mm5 \n\t" 69 "paddw %%mm6, %%mm7 \n\t" 70 "psubw %%mm2, %%mm1 \n\t" 71 "paddw %%mm0, %%mm3 \n\t" 73 "movq %%mm5, %%mm4 \n\t" 74 "movq %%mm7, %%mm6 \n\t" 75 "movq %%mm3, %%mm0 \n\t" 76 "movq %%mm1, %%mm2 \n\t" 77 SUMSUB_BA( %%mm7, %%mm5 )
78 "paddw %%mm3, %%mm7 \n\t" 79 "paddw %%mm1, %%mm5 \n\t" 80 "paddw %%mm7, %%mm7 \n\t" 81 "paddw %%mm5, %%mm5 \n\t" 82 "paddw %%mm6, %%mm7 \n\t" 83 "paddw %%mm4, %%mm5 \n\t" 85 SUMSUB_BA( %%mm1, %%mm3 )
86 "psubw %%mm1, %%mm4 \n\t" 87 "movq %%mm4, %%mm1 \n\t" 88 "psubw %%mm6, %%mm3 \n\t" 89 "paddw %%mm1, %%mm1 \n\t" 90 "paddw %%mm3, %%mm3 \n\t" 91 "psubw %%mm2, %%mm1 \n\t" 92 "paddw %%mm0, %%mm3 \n\t" 94 "movq 32(%0), %%mm2 \n\t" 95 "movq 96(%0), %%mm6 \n\t" 96 "movq %%mm2, %%mm4 \n\t" 97 "movq %%mm6, %%mm0 \n\t" 98 "psllw $2, %%mm4 \n\t" 99 "psllw $2, %%mm6 \n\t" 100 "paddw %%mm4, %%mm2 \n\t" 101 "paddw %%mm6, %%mm0 \n\t" 102 "paddw %%mm2, %%mm2 \n\t" 103 "paddw %%mm0, %%mm0 \n\t" 104 "psubw %%mm0, %%mm4 \n\t" 105 "paddw %%mm2, %%mm6 \n\t" 107 "movq (%0), %%mm2 \n\t" 108 "movq 64(%0), %%mm0 \n\t" 109 SUMSUB_BA( %%mm0, %%mm2 )
110 "psllw $3, %%mm0 \n\t" 111 "psllw $3, %%mm2 \n\t" 112 "paddw %1, %%mm0 \n\t" 113 "paddw %1, %%mm2 \n\t" 115 SUMSUB_BA( %%mm6, %%mm0 )
116 SUMSUB_BA( %%mm4, %%mm2 )
117 SUMSUB_BA( %%mm7, %%mm6 )
118 SUMSUB_BA( %%mm5, %%mm4 )
119 SUMSUB_BA( %%mm3, %%mm2 )
120 SUMSUB_BA( %%mm1, %%mm0 )
121 :: "
r"(block), "
m"(bias)
133 cavs_idct8_1d(block+4*i,
ff_pw_4.
a);
136 "psraw $3, %%mm7 \n\t" 137 "psraw $3, %%mm6 \n\t" 138 "psraw $3, %%mm5 \n\t" 139 "psraw $3, %%mm4 \n\t" 140 "psraw $3, %%mm3 \n\t" 141 "psraw $3, %%mm2 \n\t" 142 "psraw $3, %%mm1 \n\t" 143 "psraw $3, %%mm0 \n\t" 144 "movq %%mm7, %0 \n\t" 145 TRANSPOSE4( %%mm0, %%mm2, %%mm4, %%mm6, %%mm7 )
146 "movq %%mm0, 8(%1) \n\t" 147 "movq %%mm6, 24(%1) \n\t" 148 "movq %%mm7, 40(%1) \n\t" 149 "movq %%mm4, 56(%1) \n\t" 150 "movq %0, %%mm7 \n\t" 151 TRANSPOSE4( %%mm7, %%mm5, %%mm3, %%mm1, %%mm0 )
152 "movq %%mm7, (%1) \n\t" 153 "movq %%mm1, 16(%1) \n\t" 154 "movq %%mm0, 32(%1) \n\t" 155 "movq %%mm3, 48(%1) \n\t" 166 "psraw $7, %%mm7 \n\t" 167 "psraw $7, %%mm6 \n\t" 168 "psraw $7, %%mm5 \n\t" 169 "psraw $7, %%mm4 \n\t" 170 "psraw $7, %%mm3 \n\t" 171 "psraw $7, %%mm2 \n\t" 172 "psraw $7, %%mm1 \n\t" 173 "psraw $7, %%mm0 \n\t" 174 "movq %%mm7, (%0) \n\t" 175 "movq %%mm5, 16(%0) \n\t" 176 "movq %%mm3, 32(%0) \n\t" 177 "movq %%mm1, 48(%0) \n\t" 178 "movq %%mm0, 64(%0) \n\t" 179 "movq %%mm2, 80(%0) \n\t" 180 "movq %%mm4, 96(%0) \n\t" 181 "movq %%mm6, 112(%0) \n\t" 197 #define QPEL_CAVSV1(A,B,C,D,E,F,OP,MUL2) \ 198 "movd (%0), "#F" \n\t"\ 199 "movq "#C", %%mm6 \n\t"\ 200 "pmullw %5, %%mm6 \n\t"\ 201 "movq "#D", %%mm7 \n\t"\ 202 "pmullw "MANGLE(MUL2)", %%mm7\n\t"\ 203 "psllw $3, "#E" \n\t"\ 204 "psubw "#E", %%mm6 \n\t"\ 205 "psraw $3, "#E" \n\t"\ 206 "paddw %%mm7, %%mm6 \n\t"\ 207 "paddw "#E", %%mm6 \n\t"\ 208 "paddw "#B", "#B" \n\t"\ 209 "pxor %%mm7, %%mm7 \n\t"\ 211 "punpcklbw %%mm7, "#F" \n\t"\ 212 "psubw "#B", %%mm6 \n\t"\ 213 "psraw $1, "#B" \n\t"\ 214 "psubw "#A", %%mm6 \n\t"\ 215 "paddw %4, %%mm6 \n\t"\ 216 "psraw $7, %%mm6 \n\t"\ 217 "packuswb %%mm6, %%mm6 \n\t"\ 218 OP(%%mm6, (%1), A, d) \ 222 #define QPEL_CAVSV2(A,B,C,D,E,F,OP,MUL2) \ 223 "movd (%0), "#F" \n\t"\ 224 "movq "#C", %%mm6 \n\t"\ 225 "paddw "#D", %%mm6 \n\t"\ 226 "pmullw %5, %%mm6 \n\t"\ 228 "punpcklbw %%mm7, "#F" \n\t"\ 229 "psubw "#B", %%mm6 \n\t"\ 230 "psubw "#E", %%mm6 \n\t"\ 231 "paddw %4, %%mm6 \n\t"\ 232 "psraw $3, %%mm6 \n\t"\ 233 "packuswb %%mm6, %%mm6 \n\t"\ 234 OP(%%mm6, (%1), A, d) \ 238 #define QPEL_CAVSV3(A,B,C,D,E,F,OP,MUL2) \ 239 "movd (%0), "#F" \n\t"\ 240 "movq "#C", %%mm6 \n\t"\ 241 "pmullw "MANGLE(MUL2)", %%mm6\n\t"\ 242 "movq "#D", %%mm7 \n\t"\ 243 "pmullw %5, %%mm7 \n\t"\ 244 "psllw $3, "#B" \n\t"\ 245 "psubw "#B", %%mm6 \n\t"\ 246 "psraw $3, "#B" \n\t"\ 247 "paddw %%mm7, %%mm6 \n\t"\ 248 "paddw "#B", %%mm6 \n\t"\ 249 "paddw "#E", "#E" \n\t"\ 250 "pxor %%mm7, %%mm7 \n\t"\ 252 "punpcklbw %%mm7, "#F" \n\t"\ 253 "psubw "#E", %%mm6 \n\t"\ 254 "psraw $1, "#E" \n\t"\ 255 "psubw "#F", %%mm6 \n\t"\ 256 "paddw %4, %%mm6 \n\t"\ 257 "psraw $7, %%mm6 \n\t"\ 258 "packuswb %%mm6, %%mm6 \n\t"\ 259 OP(%%mm6, (%1), A, d) \ 263 #define QPEL_CAVSVNUM(VOP,OP,ADD,MUL1,MUL2)\ 269 "pxor %%mm7, %%mm7 \n\t"\ 270 "movd (%0), %%mm0 \n\t"\ 272 "movd (%0), %%mm1 \n\t"\ 274 "movd (%0), %%mm2 \n\t"\ 276 "movd (%0), %%mm3 \n\t"\ 278 "movd (%0), %%mm4 \n\t"\ 280 "punpcklbw %%mm7, %%mm0 \n\t"\ 281 "punpcklbw %%mm7, %%mm1 \n\t"\ 282 "punpcklbw %%mm7, %%mm2 \n\t"\ 283 "punpcklbw %%mm7, %%mm3 \n\t"\ 284 "punpcklbw %%mm7, %%mm4 \n\t"\ 285 VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, MUL2)\ 286 VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, MUL2)\ 287 VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, MUL2)\ 288 VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, MUL2)\ 289 VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP, MUL2)\ 290 VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP, MUL2)\ 291 VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, MUL2)\ 292 VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, MUL2)\ 294 : "+a"(src), "+c"(dst)\ 295 : "S"((x86_reg)srcStride), "r"((x86_reg)dstStride), "m"(ADD), "m"(MUL1)\ 300 VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, MUL2)\ 301 VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, MUL2)\ 302 VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP, MUL2)\ 303 VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP, MUL2)\ 304 VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, MUL2)\ 305 VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, MUL2)\ 306 VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, MUL2)\ 307 VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, MUL2)\ 309 : "+a"(src), "+c"(dst)\ 310 : "S"((x86_reg)srcStride), "r"((x86_reg)dstStride), "m"(ADD), "m"(MUL1)\ 314 src += 4-(h+5)*srcStride;\ 315 dst += 4-h*dstStride;\ 318 #define QPEL_CAVS(OPNAME, OP, MMX)\ 319 static void OPNAME ## cavs_qpel8_h_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ 322 "pxor %%mm7, %%mm7 \n\t"\ 323 "movq %5, %%mm6 \n\t"\ 325 "movq (%0), %%mm0 \n\t"\ 326 "movq 1(%0), %%mm2 \n\t"\ 327 "movq %%mm0, %%mm1 \n\t"\ 328 "movq %%mm2, %%mm3 \n\t"\ 329 "punpcklbw %%mm7, %%mm0 \n\t"\ 330 "punpckhbw %%mm7, %%mm1 \n\t"\ 331 "punpcklbw %%mm7, %%mm2 \n\t"\ 332 "punpckhbw %%mm7, %%mm3 \n\t"\ 333 "paddw %%mm2, %%mm0 \n\t"\ 334 "paddw %%mm3, %%mm1 \n\t"\ 335 "pmullw %%mm6, %%mm0 \n\t"\ 336 "pmullw %%mm6, %%mm1 \n\t"\ 337 "movq -1(%0), %%mm2 \n\t"\ 338 "movq 2(%0), %%mm4 \n\t"\ 339 "movq %%mm2, %%mm3 \n\t"\ 340 "movq %%mm4, %%mm5 \n\t"\ 341 "punpcklbw %%mm7, %%mm2 \n\t"\ 342 "punpckhbw %%mm7, %%mm3 \n\t"\ 343 "punpcklbw %%mm7, %%mm4 \n\t"\ 344 "punpckhbw %%mm7, %%mm5 \n\t"\ 345 "paddw %%mm4, %%mm2 \n\t"\ 346 "paddw %%mm3, %%mm5 \n\t"\ 347 "psubw %%mm2, %%mm0 \n\t"\ 348 "psubw %%mm5, %%mm1 \n\t"\ 349 "movq %6, %%mm5 \n\t"\ 350 "paddw %%mm5, %%mm0 \n\t"\ 351 "paddw %%mm5, %%mm1 \n\t"\ 352 "psraw $3, %%mm0 \n\t"\ 353 "psraw $3, %%mm1 \n\t"\ 354 "packuswb %%mm1, %%mm0 \n\t"\ 355 OP(%%mm0, (%1),%%mm5, q) \ 360 : "+a"(src), "+c"(dst), "+m"(h)\ 361 : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_4)\ 366 static inline void OPNAME ## cavs_qpel8or16_v1_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ 367 QPEL_CAVSVNUM(QPEL_CAVSV1,OP,ff_pw_64,ff_pw_96,ff_pw_42) \ 370 static inline void OPNAME ## cavs_qpel8or16_v2_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ 371 QPEL_CAVSVNUM(QPEL_CAVSV2,OP,ff_pw_4,ff_pw_5,ff_pw_5) \ 374 static inline void OPNAME ## cavs_qpel8or16_v3_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ 375 QPEL_CAVSVNUM(QPEL_CAVSV3,OP,ff_pw_64,ff_pw_96,ff_pw_42) \ 378 static void OPNAME ## cavs_qpel8_v1_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ 379 OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst , src , dstStride, srcStride, 8);\ 381 static void OPNAME ## cavs_qpel16_v1_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ 382 OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst , src , dstStride, srcStride, 16);\ 383 OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\ 386 static void OPNAME ## cavs_qpel8_v2_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ 387 OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst , src , dstStride, srcStride, 8);\ 389 static void OPNAME ## cavs_qpel16_v2_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ 390 OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst , src , dstStride, srcStride, 16);\ 391 OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\ 394 static void OPNAME ## cavs_qpel8_v3_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ 395 OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst , src , dstStride, srcStride, 8);\ 397 static void OPNAME ## cavs_qpel16_v3_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ 398 OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst , src , dstStride, srcStride, 16);\ 399 OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\ 402 static void OPNAME ## cavs_qpel16_h_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ 403 OPNAME ## cavs_qpel8_h_ ## MMX(dst , src , dstStride, srcStride);\ 404 OPNAME ## cavs_qpel8_h_ ## MMX(dst+8, src+8, dstStride, srcStride);\ 407 OPNAME ## cavs_qpel8_h_ ## MMX(dst , src , dstStride, srcStride);\ 408 OPNAME ## cavs_qpel8_h_ ## MMX(dst+8, src+8, dstStride, srcStride);\ 411 #define CAVS_MC(OPNAME, SIZE, MMX) \ 412 static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc20_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ 414 OPNAME ## cavs_qpel ## SIZE ## _h_ ## MMX(dst, src, stride, stride);\ 417 static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ 419 OPNAME ## cavs_qpel ## SIZE ## _v1_ ## MMX(dst, src, stride, stride);\ 422 static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ 424 OPNAME ## cavs_qpel ## SIZE ## _v2_ ## MMX(dst, src, stride, stride);\ 427 static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ 429 OPNAME ## cavs_qpel ## SIZE ## _v3_ ## MMX(dst, src, stride, stride);\ 432 #define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b " \n\t" 433 #define AVG_3DNOW_OP(a,b,temp, size) \ 434 "mov" #size " " #b ", " #temp " \n\t"\ 435 "pavgusb " #temp ", " #a " \n\t"\ 436 "mov" #size " " #a ", " #b " \n\t" 437 #define AVG_MMXEXT_OP(a, b, temp, size) \ 438 "mov" #size " " #b ", " #temp " \n\t"\ 439 "pavgb " #temp ", " #a " \n\t"\ 440 "mov" #size " " #a ", " #b " \n\t" 444 #if HAVE_MMXEXT_INLINE 445 QPEL_CAVS(put_, PUT_OP, mmxext)
446 QPEL_CAVS(avg_, AVG_MMXEXT_OP, mmxext)
456 #define dspfunc(PFX, IDX, NUM) \ 457 c->PFX ## _pixels_tab[IDX][ 0] = ff_ ## PFX ## NUM ## _mc00_mmxext; \ 458 c->PFX ## _pixels_tab[IDX][ 2] = ff_ ## PFX ## NUM ## _mc20_mmxext; \ 459 c->PFX ## _pixels_tab[IDX][ 4] = ff_ ## PFX ## NUM ## _mc01_mmxext; \ 460 c->PFX ## _pixels_tab[IDX][ 8] = ff_ ## PFX ## NUM ## _mc02_mmxext; \ 461 c->PFX ## _pixels_tab[IDX][12] = ff_ ## PFX ## NUM ## _mc03_mmxext; \ 468 c->cavs_idct8_add = cavs_idct8_add_mmx;
473 #if HAVE_AMD3DNOW_INLINE 474 QPEL_CAVS(put_, PUT_OP, 3dnow)
475 QPEL_CAVS(avg_, AVG_3DNOW_OP, 3dnow)
485 #define dspfunc(PFX, IDX, NUM) \ 486 c->PFX ## _pixels_tab[IDX][ 0] = ff_ ## PFX ## NUM ## _mc00_mmxext; \ 487 c->PFX ## _pixels_tab[IDX][ 2] = ff_ ## PFX ## NUM ## _mc20_3dnow; \ 488 c->PFX ## _pixels_tab[IDX][ 4] = ff_ ## PFX ## NUM ## _mc01_3dnow; \ 489 c->PFX ## _pixels_tab[IDX][ 8] = ff_ ## PFX ## NUM ## _mc02_3dnow; \ 490 c->PFX ## _pixels_tab[IDX][12] = ff_ ## PFX ## NUM ## _mc03_3dnow; \ 497 c->cavs_idct8_add = cavs_idct8_add_mmx;
506 #if HAVE_MMXEXT_INLINE 509 #if HAVE_AMD3DNOW_INLINE #define TRANSPOSE4(a, b, c, d, t)
#define DECLARE_ALIGNED(n, t, v)
Macro definitions for various function/variable attributes.
#define AV_CPU_FLAG_MMXEXT
SSE integer functions or AMD MMX ext.
#define CAVS_MC(OPNAME, SIZE)
void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int line_size)
#define AV_CPU_FLAG_3DNOW
AMD 3DNOW.
#define dspfunc(PFX, IDX, NUM)
main external API structure.
synthesis window for stochastic i
av_cold void ff_cavsdsp_init_x86(CAVSDSPContext *c, AVCodecContext *avctx)
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
common internal and external API header
else dst[i][x+y *dst_stride[i]]
#define FF_TRANSPOSE_IDCT_PERM