34 #if (HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE)    37 #define SUMSUB_BA( a, b ) \    38     "paddw "#b", "#a" \n\t"\    39     "paddw "#b", "#b" \n\t"\    40     "psubw "#a", "#b" \n\t"    48 static inline void cavs_idct8_1d(int16_t *block, uint64_t bias)
    51         "movq 112(%0), %%mm4  \n\t"     52         "movq  16(%0), %%mm5  \n\t"     53         "movq  80(%0), %%mm2  \n\t"     54         "movq  48(%0), %%mm7  \n\t"     55         "movq   %%mm4, %%mm0  \n\t"    56         "movq   %%mm5, %%mm3  \n\t"    57         "movq   %%mm2, %%mm6  \n\t"    58         "movq   %%mm7, %%mm1  \n\t"    60         "paddw  %%mm4, %%mm4  \n\t"     61         "paddw  %%mm3, %%mm3  \n\t"     62         "paddw  %%mm6, %%mm6  \n\t"     63         "paddw  %%mm1, %%mm1  \n\t"     64         "paddw  %%mm4, %%mm0  \n\t"     65         "paddw  %%mm3, %%mm5  \n\t"     66         "paddw  %%mm6, %%mm2  \n\t"     67         "paddw  %%mm1, %%mm7  \n\t"     68         "psubw  %%mm4, %%mm5  \n\t"     69         "paddw  %%mm6, %%mm7  \n\t"     70         "psubw  %%mm2, %%mm1  \n\t"     71         "paddw  %%mm0, %%mm3  \n\t"     73         "movq   %%mm5, %%mm4  \n\t"    74         "movq   %%mm7, %%mm6  \n\t"    75         "movq   %%mm3, %%mm0  \n\t"    76         "movq   %%mm1, %%mm2  \n\t"    77         SUMSUB_BA( %%mm7, %%mm5 )   
    78         "paddw  %%mm3, %%mm7  \n\t"     79         "paddw  %%mm1, %%mm5  \n\t"     80         "paddw  %%mm7, %%mm7  \n\t"    81         "paddw  %%mm5, %%mm5  \n\t"    82         "paddw  %%mm6, %%mm7  \n\t"     83         "paddw  %%mm4, %%mm5  \n\t"     85         SUMSUB_BA( %%mm1, %%mm3 )   
    86         "psubw  %%mm1, %%mm4  \n\t"     87         "movq   %%mm4, %%mm1  \n\t"     88         "psubw  %%mm6, %%mm3  \n\t"     89         "paddw  %%mm1, %%mm1  \n\t"    90         "paddw  %%mm3, %%mm3  \n\t"    91         "psubw  %%mm2, %%mm1  \n\t"     92         "paddw  %%mm0, %%mm3  \n\t"     94         "movq  32(%0), %%mm2  \n\t"     95         "movq  96(%0), %%mm6  \n\t"     96         "movq   %%mm2, %%mm4  \n\t"    97         "movq   %%mm6, %%mm0  \n\t"    98         "psllw  $2,    %%mm4  \n\t"     99         "psllw  $2,    %%mm6  \n\t"    100         "paddw  %%mm4, %%mm2  \n\t"    101         "paddw  %%mm6, %%mm0  \n\t"    102         "paddw  %%mm2, %%mm2  \n\t"   103         "paddw  %%mm0, %%mm0  \n\t"   104         "psubw  %%mm0, %%mm4  \n\t"    105         "paddw  %%mm2, %%mm6  \n\t"    107         "movq    (%0), %%mm2  \n\t"    108         "movq  64(%0), %%mm0  \n\t"    109         SUMSUB_BA( %%mm0, %%mm2 )   
   110         "psllw  $3,    %%mm0  \n\t"   111         "psllw  $3,    %%mm2  \n\t"   112         "paddw  %1,    %%mm0  \n\t"    113         "paddw  %1,    %%mm2  \n\t"    115         SUMSUB_BA( %%mm6, %%mm0 )   
   116         SUMSUB_BA( %%mm4, %%mm2 )   
   117         SUMSUB_BA( %%mm7, %%mm6 )   
   118         SUMSUB_BA( %%mm5, %%mm4 )   
   119         SUMSUB_BA( %%mm3, %%mm2 )   
   120         SUMSUB_BA( %%mm1, %%mm0 )   
   121         :: "
r"(block), "
m"(bias)
   133         cavs_idct8_1d(block+4*i, 
ff_pw_4.
a);
   136             "psraw     $3, %%mm7  \n\t"   137             "psraw     $3, %%mm6  \n\t"   138             "psraw     $3, %%mm5  \n\t"   139             "psraw     $3, %%mm4  \n\t"   140             "psraw     $3, %%mm3  \n\t"   141             "psraw     $3, %%mm2  \n\t"   142             "psraw     $3, %%mm1  \n\t"   143             "psraw     $3, %%mm0  \n\t"   144             "movq   %%mm7,    %0   \n\t"   145             TRANSPOSE4( %%mm0, %%mm2, %%mm4, %%mm6, %%mm7 )
   146             "movq   %%mm0,  8(%1)  \n\t"   147             "movq   %%mm6, 24(%1)  \n\t"   148             "movq   %%mm7, 40(%1)  \n\t"   149             "movq   %%mm4, 56(%1)  \n\t"   150             "movq    %0,    %%mm7  \n\t"   151             TRANSPOSE4( %%mm7, %%mm5, %%mm3, %%mm1, %%mm0 )
   152             "movq   %%mm7,   (%1)  \n\t"   153             "movq   %%mm1, 16(%1)  \n\t"   154             "movq   %%mm0, 32(%1)  \n\t"   155             "movq   %%mm3, 48(%1)  \n\t"   166             "psraw     $7, %%mm7  \n\t"   167             "psraw     $7, %%mm6  \n\t"   168             "psraw     $7, %%mm5  \n\t"   169             "psraw     $7, %%mm4  \n\t"   170             "psraw     $7, %%mm3  \n\t"   171             "psraw     $7, %%mm2  \n\t"   172             "psraw     $7, %%mm1  \n\t"   173             "psraw     $7, %%mm0  \n\t"   174             "movq   %%mm7,    (%0)  \n\t"   175             "movq   %%mm5,  16(%0)  \n\t"   176             "movq   %%mm3,  32(%0)  \n\t"   177             "movq   %%mm1,  48(%0)  \n\t"   178             "movq   %%mm0,  64(%0)  \n\t"   179             "movq   %%mm2,  80(%0)  \n\t"   180             "movq   %%mm4,  96(%0)  \n\t"   181             "movq   %%mm6, 112(%0)  \n\t"   197 #define QPEL_CAVSV1(A,B,C,D,E,F,OP,MUL2) \   198         "movd (%0), "#F"            \n\t"\   199         "movq "#C", %%mm6           \n\t"\   200         "pmullw %5, %%mm6           \n\t"\   201         "movq "#D", %%mm7           \n\t"\   202         "pmullw "MANGLE(MUL2)", %%mm7\n\t"\   203         "psllw $3, "#E"             \n\t"\   204         "psubw "#E", %%mm6          \n\t"\   205         "psraw $3, "#E"             \n\t"\   206         "paddw %%mm7, %%mm6         \n\t"\   207         "paddw "#E", %%mm6          \n\t"\   208         "paddw "#B", "#B"           \n\t"\   209         "pxor %%mm7, %%mm7          \n\t"\   211         "punpcklbw %%mm7, "#F"      \n\t"\   212         "psubw "#B", %%mm6          \n\t"\   213         "psraw $1, "#B"             \n\t"\   214         "psubw "#A", %%mm6          \n\t"\   215         "paddw %4, %%mm6            \n\t"\   216         "psraw $7, %%mm6            \n\t"\   217         "packuswb %%mm6, %%mm6      \n\t"\   218         OP(%%mm6, (%1), A, d)            \   222 #define QPEL_CAVSV2(A,B,C,D,E,F,OP,MUL2) \   223         "movd (%0), "#F"            \n\t"\   224         "movq "#C", %%mm6           \n\t"\   225         "paddw "#D", %%mm6          \n\t"\   226         "pmullw %5, %%mm6           \n\t"\   228         "punpcklbw %%mm7, "#F"      \n\t"\   229         "psubw "#B", %%mm6          \n\t"\   230         "psubw "#E", %%mm6          \n\t"\   231         "paddw %4, %%mm6            \n\t"\   232         "psraw $3, %%mm6            \n\t"\   233         "packuswb %%mm6, %%mm6      \n\t"\   234         OP(%%mm6, (%1), A, d)            \   238 #define QPEL_CAVSV3(A,B,C,D,E,F,OP,MUL2) \   239         "movd (%0), "#F"            \n\t"\   240         "movq "#C", %%mm6           \n\t"\   241         "pmullw "MANGLE(MUL2)", %%mm6\n\t"\   242         "movq "#D", %%mm7           \n\t"\   243         "pmullw %5, %%mm7           \n\t"\   244         "psllw $3, "#B"             \n\t"\   245         "psubw "#B", %%mm6          \n\t"\   246         "psraw $3, "#B"             \n\t"\   247         "paddw %%mm7, %%mm6         \n\t"\   248         "paddw "#B", %%mm6          \n\t"\   249         "paddw "#E", "#E"           \n\t"\   250         "pxor %%mm7, %%mm7          \n\t"\   252         "punpcklbw %%mm7, "#F"      \n\t"\   253         "psubw "#E", %%mm6          \n\t"\   254         "psraw $1, "#E"             \n\t"\   255         "psubw "#F", %%mm6          \n\t"\   256         "paddw %4, %%mm6            \n\t"\   257         "psraw $7, %%mm6            \n\t"\   258         "packuswb %%mm6, %%mm6      \n\t"\   259         OP(%%mm6, (%1), A, d)            \   263 #define QPEL_CAVSVNUM(VOP,OP,ADD,MUL1,MUL2)\   269         "pxor %%mm7, %%mm7          \n\t"\   270         "movd (%0), %%mm0           \n\t"\   272         "movd (%0), %%mm1           \n\t"\   274         "movd (%0), %%mm2           \n\t"\   276         "movd (%0), %%mm3           \n\t"\   278         "movd (%0), %%mm4           \n\t"\   280         "punpcklbw %%mm7, %%mm0     \n\t"\   281         "punpcklbw %%mm7, %%mm1     \n\t"\   282         "punpcklbw %%mm7, %%mm2     \n\t"\   283         "punpcklbw %%mm7, %%mm3     \n\t"\   284         "punpcklbw %%mm7, %%mm4     \n\t"\   285         VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, MUL2)\   286         VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, MUL2)\   287         VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, MUL2)\   288         VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, MUL2)\   289         VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP, MUL2)\   290         VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP, MUL2)\   291         VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, MUL2)\   292         VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, MUL2)\   294         : "+a"(src), "+c"(dst)\   295         : "S"((x86_reg)srcStride), "r"((x86_reg)dstStride), "m"(ADD), "m"(MUL1)\   300             VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, MUL2)\   301             VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, MUL2)\   302             VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP, MUL2)\   303             VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP, MUL2)\   304             VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, MUL2)\   305             VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, MUL2)\   306             VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, MUL2)\   307             VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, MUL2)\   309            : "+a"(src), "+c"(dst)\   310            : "S"((x86_reg)srcStride), "r"((x86_reg)dstStride), "m"(ADD),  "m"(MUL1)\   314      src += 4-(h+5)*srcStride;\   315      dst += 4-h*dstStride;\   318 #define QPEL_CAVS(OPNAME, OP, MMX)\   319 static void OPNAME ## cavs_qpel8_h_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\   322         "pxor %%mm7, %%mm7          \n\t"\   323         "movq %5, %%mm6             \n\t"\   325         "movq    (%0), %%mm0        \n\t"\   326         "movq   1(%0), %%mm2        \n\t"\   327         "movq %%mm0, %%mm1          \n\t"\   328         "movq %%mm2, %%mm3          \n\t"\   329         "punpcklbw %%mm7, %%mm0     \n\t"\   330         "punpckhbw %%mm7, %%mm1     \n\t"\   331         "punpcklbw %%mm7, %%mm2     \n\t"\   332         "punpckhbw %%mm7, %%mm3     \n\t"\   333         "paddw %%mm2, %%mm0         \n\t"\   334         "paddw %%mm3, %%mm1         \n\t"\   335         "pmullw %%mm6, %%mm0        \n\t"\   336         "pmullw %%mm6, %%mm1        \n\t"\   337         "movq   -1(%0), %%mm2       \n\t"\   338         "movq    2(%0), %%mm4       \n\t"\   339         "movq %%mm2, %%mm3          \n\t"\   340         "movq %%mm4, %%mm5          \n\t"\   341         "punpcklbw %%mm7, %%mm2     \n\t"\   342         "punpckhbw %%mm7, %%mm3     \n\t"\   343         "punpcklbw %%mm7, %%mm4     \n\t"\   344         "punpckhbw %%mm7, %%mm5     \n\t"\   345         "paddw %%mm4, %%mm2         \n\t"\   346         "paddw %%mm3, %%mm5         \n\t"\   347         "psubw %%mm2, %%mm0         \n\t"\   348         "psubw %%mm5, %%mm1         \n\t"\   349         "movq %6, %%mm5             \n\t"\   350         "paddw %%mm5, %%mm0         \n\t"\   351         "paddw %%mm5, %%mm1         \n\t"\   352         "psraw $3, %%mm0            \n\t"\   353         "psraw $3, %%mm1            \n\t"\   354         "packuswb %%mm1, %%mm0      \n\t"\   355         OP(%%mm0, (%1),%%mm5, q)         \   360         : "+a"(src), "+c"(dst), "+m"(h)\   361         : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_4)\   366 static inline void OPNAME ## cavs_qpel8or16_v1_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\   367   QPEL_CAVSVNUM(QPEL_CAVSV1,OP,ff_pw_64,ff_pw_96,ff_pw_42)      \   370 static inline void OPNAME ## cavs_qpel8or16_v2_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\   371   QPEL_CAVSVNUM(QPEL_CAVSV2,OP,ff_pw_4,ff_pw_5,ff_pw_5)         \   374 static inline void OPNAME ## cavs_qpel8or16_v3_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\   375   QPEL_CAVSVNUM(QPEL_CAVSV3,OP,ff_pw_64,ff_pw_96,ff_pw_42)      \   378 static void OPNAME ## cavs_qpel8_v1_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\   379     OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst  , src  , dstStride, srcStride, 8);\   381 static void OPNAME ## cavs_qpel16_v1_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\   382     OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst  , src  , dstStride, srcStride, 16);\   383     OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\   386 static void OPNAME ## cavs_qpel8_v2_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\   387     OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst  , src  , dstStride, srcStride, 8);\   389 static void OPNAME ## cavs_qpel16_v2_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\   390     OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst  , src  , dstStride, srcStride, 16);\   391     OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\   394 static void OPNAME ## cavs_qpel8_v3_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\   395     OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst  , src  , dstStride, srcStride, 8);\   397 static void OPNAME ## cavs_qpel16_v3_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\   398     OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst  , src  , dstStride, srcStride, 16);\   399     OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\   402 static void OPNAME ## cavs_qpel16_h_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\   403     OPNAME ## cavs_qpel8_h_ ## MMX(dst  , src  , dstStride, srcStride);\   404     OPNAME ## cavs_qpel8_h_ ## MMX(dst+8, src+8, dstStride, srcStride);\   407     OPNAME ## cavs_qpel8_h_ ## MMX(dst  , src  , dstStride, srcStride);\   408     OPNAME ## cavs_qpel8_h_ ## MMX(dst+8, src+8, dstStride, srcStride);\   411 #define CAVS_MC(OPNAME, SIZE, MMX) \   412 static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc20_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\   414     OPNAME ## cavs_qpel ## SIZE ## _h_ ## MMX(dst, src, stride, stride);\   417 static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\   419     OPNAME ## cavs_qpel ## SIZE ## _v1_ ## MMX(dst, src, stride, stride);\   422 static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\   424     OPNAME ## cavs_qpel ## SIZE ## _v2_ ## MMX(dst, src, stride, stride);\   427 static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\   429     OPNAME ## cavs_qpel ## SIZE ## _v3_ ## MMX(dst, src, stride, stride);\   432 #define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b "    \n\t"   433 #define AVG_3DNOW_OP(a,b,temp, size) \   434 "mov" #size " " #b ", " #temp "   \n\t"\   435 "pavgusb " #temp ", " #a "        \n\t"\   436 "mov" #size " " #a ", " #b "      \n\t"   437 #define AVG_MMXEXT_OP(a, b, temp, size) \   438 "mov" #size " " #b ", " #temp "   \n\t"\   439 "pavgb " #temp ", " #a "          \n\t"\   440 "mov" #size " " #a ", " #b "      \n\t"   444 #if HAVE_MMXEXT_INLINE   445 QPEL_CAVS(put_,        PUT_OP, mmxext)
   446 QPEL_CAVS(avg_, AVG_MMXEXT_OP, mmxext)
   456 #define dspfunc(PFX, IDX, NUM) \   457     c->PFX ## _pixels_tab[IDX][ 0] = ff_ ## PFX ## NUM ## _mc00_mmxext; \   458     c->PFX ## _pixels_tab[IDX][ 2] = ff_ ## PFX ## NUM ## _mc20_mmxext; \   459     c->PFX ## _pixels_tab[IDX][ 4] = ff_ ## PFX ## NUM ## _mc01_mmxext; \   460     c->PFX ## _pixels_tab[IDX][ 8] = ff_ ## PFX ## NUM ## _mc02_mmxext; \   461     c->PFX ## _pixels_tab[IDX][12] = ff_ ## PFX ## NUM ## _mc03_mmxext; \   468     c->cavs_idct8_add = cavs_idct8_add_mmx;
   473 #if HAVE_AMD3DNOW_INLINE   474 QPEL_CAVS(put_,       PUT_OP, 3dnow)
   475 QPEL_CAVS(avg_, AVG_3DNOW_OP, 3dnow)
   485 #define dspfunc(PFX, IDX, NUM) \   486     c->PFX ## _pixels_tab[IDX][ 0] = ff_ ## PFX ## NUM ## _mc00_mmxext; \   487     c->PFX ## _pixels_tab[IDX][ 2] = ff_ ## PFX ## NUM ## _mc20_3dnow; \   488     c->PFX ## _pixels_tab[IDX][ 4] = ff_ ## PFX ## NUM ## _mc01_3dnow; \   489     c->PFX ## _pixels_tab[IDX][ 8] = ff_ ## PFX ## NUM ## _mc02_3dnow; \   490     c->PFX ## _pixels_tab[IDX][12] = ff_ ## PFX ## NUM ## _mc03_3dnow; \   497     c->cavs_idct8_add = cavs_idct8_add_mmx;
   506 #if HAVE_MMXEXT_INLINE   509 #if HAVE_AMD3DNOW_INLINE #define TRANSPOSE4(a, b, c, d, t)
#define DECLARE_ALIGNED(n, t, v)
Macro definitions for various function/variable attributes. 
#define AV_CPU_FLAG_MMXEXT
SSE integer functions or AMD MMX ext. 
#define CAVS_MC(OPNAME, SIZE)
void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int line_size)
#define AV_CPU_FLAG_3DNOW
AMD 3DNOW. 
#define dspfunc(PFX, IDX, NUM)
main external API structure. 
synthesis window for stochastic i
av_cold void ff_cavsdsp_init_x86(CAVSDSPContext *c, AVCodecContext *avctx)
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU. 
common internal and external API header 
else dst[i][x+y *dst_stride[i]]
#define FF_TRANSPOSE_IDCT_PERM