61                               int dstStride, 
int src1Stride, 
int h);
    64                                      int src1Stride, 
int h);
    66                               int dstStride, 
int src1Stride, 
int h);
    68                                int dstStride, 
int src1Stride, 
int h);
    70                                int dstStride, 
int src1Stride, 
int h);
    72                                       int dstStride, 
int src1Stride, 
int h);
    74                            ptrdiff_t line_size, 
int h);
    76 static void ff_put_pixels16_mmxext(
uint8_t *block, 
const uint8_t *pixels,
    77                                    ptrdiff_t line_size, 
int h)
    84                                          int dstStride, 
int srcStride, 
int h);
    86                                          int dstStride, 
int srcStride, 
int h);
    88                                                  int dstStride, 
int srcStride,
    91                                         int dstStride, 
int srcStride, 
int h);
    93                                         int dstStride, 
int srcStride, 
int h);
    95                                                 int dstStride, 
int srcStride,
    98                                          int dstStride, 
int srcStride);
   100                                          int dstStride, 
int srcStride);
   102                                                  int dstStride, 
int srcStride);
   104                                         int dstStride, 
int srcStride);
   106                                         int dstStride, 
int srcStride);
   108                                                 int dstStride, 
int srcStride);
   109 #define ff_put_no_rnd_pixels16_mmxext ff_put_pixels16_mmxext   110 #define ff_put_no_rnd_pixels8_mmxext ff_put_pixels8_mmxext   116 #define JUMPALIGN()     __asm__ volatile (".p2align 3"::)   117 #define MOVQ_ZERO(regd) __asm__ volatile ("pxor %%"#regd", %%"#regd ::)   119 #define MOVQ_BFE(regd)                                  \   121         "pcmpeqd %%"#regd", %%"#regd"   \n\t"           \   122         "paddb   %%"#regd", %%"#regd"   \n\t" ::)   125 #define MOVQ_BONE(regd) __asm__ volatile ("movq %0, %%"#regd" \n\t" :: "m"(ff_bone))   126 #define MOVQ_WTWO(regd) __asm__ volatile ("movq %0, %%"#regd" \n\t" :: "m"(ff_wtwo))   130 #define MOVQ_BONE(regd)                                 \   132         "pcmpeqd  %%"#regd", %%"#regd"  \n\t"           \   133         "psrlw          $15, %%"#regd"  \n\t"           \   134         "packuswb %%"#regd", %%"#regd"  \n\t" ::)   136 #define MOVQ_WTWO(regd)                                 \   138         "pcmpeqd %%"#regd", %%"#regd"   \n\t"           \   139         "psrlw         $15, %%"#regd"   \n\t"           \   140         "psllw          $1, %%"#regd"   \n\t"::)   147 #define PAVGB_MMX(rega, regb, regr, regfe)                       \   148     "movq   "#rega", "#regr"            \n\t"                    \   149     "por    "#regb", "#regr"            \n\t"                    \   150     "pxor   "#rega", "#regb"            \n\t"                    \   151     "pand  "#regfe", "#regb"            \n\t"                    \   152     "psrlq       $1, "#regb"            \n\t"                    \   153     "psubb  "#regb", "#regr"            \n\t"   156 #define PAVGBP_MMX(rega, regb, regr, regc, regd, regp)           \   157     "movq  "#rega", "#regr"             \n\t"                    \   158     "movq  "#regc", "#regp"             \n\t"                    \   159     "por   "#regb", "#regr"             \n\t"                    \   160     "por   "#regd", "#regp"             \n\t"                    \   161     "pxor  "#rega", "#regb"             \n\t"                    \   162     "pxor  "#regc", "#regd"             \n\t"                    \   163     "pand    %%mm6, "#regb"             \n\t"                    \   164     "pand    %%mm6, "#regd"             \n\t"                    \   165     "psrlq      $1, "#regd"             \n\t"                    \   166     "psrlq      $1, "#regb"             \n\t"                    \   167     "psubb "#regb", "#regr"             \n\t"                    \   168     "psubb "#regd", "#regp"             \n\t"   173 #define DEF(x, y) x ## _ ## y ## _mmx   174 #define SET_RND  MOVQ_WTWO   175 #define PAVGBP(a, b, c, d, e, f)        PAVGBP_MMX(a, b, c, d, e, f)   176 #define PAVGB(a, b, c, e)               PAVGB_MMX(a, b, c, e)   177 #define OP_AVG(a, b, c, e)              PAVGB_MMX(a, b, c, e)   196 static void ff_avg_pixels16_mmxext(
uint8_t *block, 
const uint8_t *pixels,
   197                                    int line_size, 
int h)
   221         "movq      (%3), %%mm0          \n\t"   222         "movq     8(%3), %%mm1          \n\t"   223         "movq    16(%3), %%mm2          \n\t"   224         "movq    24(%3), %%mm3          \n\t"   225         "movq    32(%3), %%mm4          \n\t"   226         "movq    40(%3), %%mm5          \n\t"   227         "movq    48(%3), %%mm6          \n\t"   228         "movq    56(%3), %%mm7          \n\t"   229         "packuswb %%mm1, %%mm0          \n\t"   230         "packuswb %%mm3, %%mm2          \n\t"   231         "packuswb %%mm5, %%mm4          \n\t"   232         "packuswb %%mm7, %%mm6          \n\t"   233         "movq     %%mm0, (%0)           \n\t"   234         "movq     %%mm2, (%0, %1)       \n\t"   235         "movq     %%mm4, (%0, %1, 2)    \n\t"   236         "movq     %%mm6, (%0, %2)       \n\t"   237         :: 
"r"(pix), 
"r"((
x86_reg)line_size), 
"r"((
x86_reg)line_size * 3),
   240     pix += line_size * 4;
   247         "movq       (%3), %%mm0         \n\t"   248         "movq      8(%3), %%mm1         \n\t"   249         "movq     16(%3), %%mm2         \n\t"   250         "movq     24(%3), %%mm3         \n\t"   251         "movq     32(%3), %%mm4         \n\t"   252         "movq     40(%3), %%mm5         \n\t"   253         "movq     48(%3), %%mm6         \n\t"   254         "movq     56(%3), %%mm7         \n\t"   255         "packuswb  %%mm1, %%mm0         \n\t"   256         "packuswb  %%mm3, %%mm2         \n\t"   257         "packuswb  %%mm5, %%mm4         \n\t"   258         "packuswb  %%mm7, %%mm6         \n\t"   259         "movq      %%mm0, (%0)          \n\t"   260         "movq      %%mm2, (%0, %1)      \n\t"   261         "movq      %%mm4, (%0, %1, 2)   \n\t"   262         "movq      %%mm6, (%0, %2)      \n\t"   263         :: 
"r"(pix), 
"r"((
x86_reg)line_size), 
"r"((
x86_reg)line_size * 3), 
"r"(p)
   267 #define put_signed_pixels_clamped_mmx_half(off)             \   268     "movq          "#off"(%2), %%mm1        \n\t"           \   269     "movq     16 + "#off"(%2), %%mm2        \n\t"           \   270     "movq     32 + "#off"(%2), %%mm3        \n\t"           \   271     "movq     48 + "#off"(%2), %%mm4        \n\t"           \   272     "packsswb  8 + "#off"(%2), %%mm1        \n\t"           \   273     "packsswb 24 + "#off"(%2), %%mm2        \n\t"           \   274     "packsswb 40 + "#off"(%2), %%mm3        \n\t"           \   275     "packsswb 56 + "#off"(%2), %%mm4        \n\t"           \   276     "paddb              %%mm0, %%mm1        \n\t"           \   277     "paddb              %%mm0, %%mm2        \n\t"           \   278     "paddb              %%mm0, %%mm3        \n\t"           \   279     "paddb              %%mm0, %%mm4        \n\t"           \   280     "movq               %%mm1, (%0)         \n\t"           \   281     "movq               %%mm2, (%0, %3)     \n\t"           \   282     "movq               %%mm3, (%0, %3, 2)  \n\t"           \   283     "movq               %%mm4, (%0, %1)     \n\t"   292         "movq "MANGLE(ff_pb_80)
", %%mm0     \n\t"   293         "lea         (%3, %3, 2), %1        \n\t"   294         put_signed_pixels_clamped_mmx_half(0)
   295         "lea         (%0, %3, 4), %0        \n\t"   296         put_signed_pixels_clamped_mmx_half(64)
   297         : 
"+&r"(pixels), 
"=&r"(line_skip3)
   298         : 
"r"(block), 
"r"(line_skip)
   316             "movq        (%2), %%mm0    \n\t"   317             "movq       8(%2), %%mm1    \n\t"   318             "movq      16(%2), %%mm2    \n\t"   319             "movq      24(%2), %%mm3    \n\t"   320             "movq          %0, %%mm4    \n\t"   321             "movq          %1, %%mm6    \n\t"   322             "movq       %%mm4, %%mm5    \n\t"   323             "punpcklbw  %%mm7, %%mm4    \n\t"   324             "punpckhbw  %%mm7, %%mm5    \n\t"   325             "paddsw     %%mm4, %%mm0    \n\t"   326             "paddsw     %%mm5, %%mm1    \n\t"   327             "movq       %%mm6, %%mm5    \n\t"   328             "punpcklbw  %%mm7, %%mm6    \n\t"   329             "punpckhbw  %%mm7, %%mm5    \n\t"   330             "paddsw     %%mm6, %%mm2    \n\t"   331             "paddsw     %%mm5, %%mm3    \n\t"   332             "packuswb   %%mm1, %%mm0    \n\t"   333             "packuswb   %%mm3, %%mm2    \n\t"   334             "movq       %%mm0, %0       \n\t"   335             "movq       %%mm2, %1       \n\t"   336             : 
"+m"(*pix), 
"+m"(*(pix + line_size))
   339         pix += line_size * 2;
   344 static void put_pixels8_mmx(
uint8_t *block, 
const uint8_t *pixels,
   345                             ptrdiff_t line_size, 
int h)
   348         "lea   (%3, %3), %%"REG_a
"      \n\t"   351         "movq  (%1    ), %%mm0          \n\t"   352         "movq  (%1, %3), %%mm1          \n\t"   353         "movq     %%mm0, (%2)           \n\t"   354         "movq     %%mm1, (%2, %3)       \n\t"   355         "add  %%"REG_a
", %1             \n\t"   356         "add  %%"REG_a
", %2             \n\t"   357         "movq  (%1    ), %%mm0          \n\t"   358         "movq  (%1, %3), %%mm1          \n\t"   359         "movq     %%mm0, (%2)           \n\t"   360         "movq     %%mm1, (%2, %3)       \n\t"   361         "add  %%"REG_a
", %1             \n\t"   362         "add  %%"REG_a
", %2             \n\t"   365         : 
"+g"(h), 
"+r"(pixels),  
"+r"(block)
   371 static void put_pixels16_mmx(
uint8_t *block, 
const uint8_t *pixels,
   372                              ptrdiff_t line_size, 
int h)
   375         "lea   (%3, %3), %%"REG_a
"      \n\t"   378         "movq  (%1    ), %%mm0          \n\t"   379         "movq 8(%1    ), %%mm4          \n\t"   380         "movq  (%1, %3), %%mm1          \n\t"   381         "movq 8(%1, %3), %%mm5          \n\t"   382         "movq     %%mm0,  (%2)          \n\t"   383         "movq     %%mm4, 8(%2)          \n\t"   384         "movq     %%mm1,  (%2, %3)      \n\t"   385         "movq     %%mm5, 8(%2, %3)      \n\t"   386         "add  %%"REG_a
", %1             \n\t"   387         "add  %%"REG_a
", %2             \n\t"   388         "movq  (%1    ), %%mm0          \n\t"   389         "movq 8(%1    ), %%mm4          \n\t"   390         "movq  (%1, %3), %%mm1          \n\t"   391         "movq 8(%1, %3), %%mm5          \n\t"   392         "movq     %%mm0,  (%2)          \n\t"   393         "movq     %%mm4, 8(%2)          \n\t"   394         "movq     %%mm1,  (%2, %3)      \n\t"   395         "movq     %%mm5, 8(%2, %3)      \n\t"   396         "add  %%"REG_a
", %1             \n\t"   397         "add  %%"REG_a
", %2             \n\t"   400         : 
"+g"(h), 
"+r"(pixels),  
"+r"(block)
   406 #define CLEAR_BLOCKS(name, n)                           \   407 static void name(int16_t *blocks)                       \   410         "pxor %%mm7, %%mm7              \n\t"           \   411         "mov     %1,        %%"REG_a"   \n\t"           \   413         "movq %%mm7,   (%0, %%"REG_a")  \n\t"           \   414         "movq %%mm7,  8(%0, %%"REG_a")  \n\t"           \   415         "movq %%mm7, 16(%0, %%"REG_a")  \n\t"           \   416         "movq %%mm7, 24(%0, %%"REG_a")  \n\t"           \   417         "add    $32, %%"REG_a"          \n\t"           \   419         :: "r"(((uint8_t *)blocks) + 128 * n),          \   424 CLEAR_BLOCKS(clear_blocks_mmx, 6)
   425 CLEAR_BLOCKS(clear_block_mmx, 1)
   427 static void clear_block_sse(int16_t *block)
   430         "xorps  %%xmm0, %%xmm0          \n"   431         "movaps %%xmm0,    (%0)         \n"   432         "movaps %%xmm0,  16(%0)         \n"   433         "movaps %%xmm0,  32(%0)         \n"   434         "movaps %%xmm0,  48(%0)         \n"   435         "movaps %%xmm0,  64(%0)         \n"   436         "movaps %%xmm0,  80(%0)         \n"   437         "movaps %%xmm0,  96(%0)         \n"   438         "movaps %%xmm0, 112(%0)         \n"   444 static void clear_blocks_sse(int16_t *blocks)
   447         "xorps  %%xmm0, %%xmm0              \n"   448         "mov        %1,         %%"REG_a
"   \n"   450         "movaps %%xmm0,    (%0, %%"REG_a
")  \n"   451         "movaps %%xmm0,  16(%0, %%"REG_a
")  \n"   452         "movaps %%xmm0,  32(%0, %%"REG_a
")  \n"   453         "movaps %%xmm0,  48(%0, %%"REG_a
")  \n"   454         "movaps %%xmm0,  64(%0, %%"REG_a
")  \n"   455         "movaps %%xmm0,  80(%0, %%"REG_a
")  \n"   456         "movaps %%xmm0,  96(%0, %%"REG_a
")  \n"   457         "movaps %%xmm0, 112(%0, %%"REG_a
")  \n"   458         "add      $128,         %%"REG_a
"   \n"   460         :: 
"r"(((
uint8_t *)blocks) + 128 * 6),
   472         "movq   (%1, %0), %%mm0         \n\t"   473         "movq   (%2, %0), %%mm1         \n\t"   474         "paddb     %%mm0, %%mm1         \n\t"   475         "movq      %%mm1, (%2, %0)      \n\t"   476         "movq  8(%1, %0), %%mm0         \n\t"   477         "movq  8(%2, %0), %%mm1         \n\t"   478         "paddb     %%mm0, %%mm1         \n\t"   479         "movq      %%mm1, 8(%2, %0)     \n\t"   494                                             int *left, 
int *left_top)
   498     int l  = *left     & 0xff;
   499     int tl = *left_top & 0xff;
   504         "movzbl (%3, %4), %2            \n"   517         "add    (%6, %4), %b0           \n"   518         "mov         %b0, (%5, %4)      \n"   521         : 
"+&q"(l), 
"+&q"(tl), 
"=&r"(
t), 
"=&q"(x), 
"+&r"(w2)
   532                            int w, 
int h, 
int sides)
   543             "movd            (%0), %%mm0    \n\t"   544             "punpcklbw      %%mm0, %%mm0    \n\t"   545             "punpcklwd      %%mm0, %%mm0    \n\t"   546             "punpckldq      %%mm0, %%mm0    \n\t"   547             "movq           %%mm0, -8(%0)   \n\t"   548             "movq      -8(%0, %2), %%mm1    \n\t"   549             "punpckhbw      %%mm1, %%mm1    \n\t"   550             "punpckhwd      %%mm1, %%mm1    \n\t"   551             "punpckhdq      %%mm1, %%mm1    \n\t"   552             "movq           %%mm1, (%0, %2) \n\t"   562             "movd            (%0), %%mm0        \n\t"   563             "punpcklbw      %%mm0, %%mm0        \n\t"   564             "punpcklwd      %%mm0, %%mm0        \n\t"   565             "punpckldq      %%mm0, %%mm0        \n\t"   566             "movq           %%mm0, -8(%0)       \n\t"   567             "movq           %%mm0, -16(%0)      \n\t"   568             "movq      -8(%0, %2), %%mm1        \n\t"   569             "punpckhbw      %%mm1, %%mm1        \n\t"   570             "punpckhwd      %%mm1, %%mm1        \n\t"   571             "punpckhdq      %%mm1, %%mm1        \n\t"   572             "movq           %%mm1,  (%0, %2)    \n\t"   573             "movq           %%mm1, 8(%0, %2)    \n\t"   584             "movd            (%0), %%mm0    \n\t"   585             "punpcklbw      %%mm0, %%mm0    \n\t"   586             "punpcklwd      %%mm0, %%mm0    \n\t"   587             "movd           %%mm0, -4(%0)   \n\t"   588             "movd      -4(%0, %2), %%mm1    \n\t"   589             "punpcklbw      %%mm1, %%mm1    \n\t"   590             "punpckhwd      %%mm1, %%mm1    \n\t"   591             "punpckhdq      %%mm1, %%mm1    \n\t"   592             "movd           %%mm1, (%0, %2) \n\t"   603         for (i = 0; i < h; i += 4) {
   607                 "movq (%1, %0), %%mm0           \n\t"   608                 "movq    %%mm0, (%0)            \n\t"   609                 "movq    %%mm0, (%0, %2)        \n\t"   610                 "movq    %%mm0, (%0, %2, 2)     \n\t"   611                 "movq    %%mm0, (%0, %3)        \n\t"   623         for (i = 0; i < h; i += 4) {
   624             ptr = last_line + (i + 1) * 
wrap - 
w;
   627                 "movq (%1, %0), %%mm0           \n\t"   628                 "movq    %%mm0, (%0)            \n\t"   629                 "movq    %%mm0, (%0, %2)        \n\t"   630                 "movq    %%mm0, (%0, %2, 2)     \n\t"   631                 "movq    %%mm0, (%0, %3)        \n\t"   647 #define QPEL_OP(OPNAME, ROUNDER, RND, MMX)                              \   648 static void OPNAME ## qpel8_mc00_ ## MMX (uint8_t *dst, uint8_t *src,   \   651     ff_ ## OPNAME ## pixels8_ ## MMX(dst, src, stride, 8);              \   654 static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst, uint8_t *src,    \   658     uint8_t * const half = (uint8_t*)temp;                              \   659     ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8,        \   661     ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src, half,                 \   662                                         stride, stride, 8);             \   665 static void OPNAME ## qpel8_mc20_ ## MMX(uint8_t *dst, uint8_t *src,    \   668     ff_ ## OPNAME ## mpeg4_qpel8_h_lowpass_ ## MMX(dst, src, stride,    \   672 static void OPNAME ## qpel8_mc30_ ## MMX(uint8_t *dst, uint8_t *src,    \   676     uint8_t * const half = (uint8_t*)temp;                              \   677     ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8,        \   679     ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src + 1, half, stride,     \   683 static void OPNAME ## qpel8_mc01_ ## MMX(uint8_t *dst, uint8_t *src,    \   687     uint8_t * const half = (uint8_t*)temp;                              \   688     ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src,           \   690     ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src, half,                 \   691                                         stride, stride, 8);             \   694 static void OPNAME ## qpel8_mc02_ ## MMX(uint8_t *dst, uint8_t *src,    \   697     ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, src,            \   701 static void OPNAME ## qpel8_mc03_ ## MMX(uint8_t *dst, uint8_t *src,    \   705     uint8_t * const half = (uint8_t*)temp;                              \   706     ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src,           \   708     ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src + stride, half, stride,\   712 static void OPNAME ## qpel8_mc11_ ## MMX(uint8_t *dst, uint8_t *src,    \   715     uint64_t half[8 + 9];                                               \   716     uint8_t * const halfH  = ((uint8_t*)half) + 64;                     \   717     uint8_t * const halfHV = ((uint8_t*)half);                          \   718     ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \   720     ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8,           \   722     ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\   723     ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV,             \   727 static void OPNAME ## qpel8_mc31_ ## MMX(uint8_t *dst, uint8_t *src,    \   730     uint64_t half[8 + 9];                                               \   731     uint8_t * const halfH  = ((uint8_t*)half) + 64;                     \   732     uint8_t * const halfHV = ((uint8_t*)half);                          \   733     ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \   735     ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8,       \   737     ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\   738     ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV,             \   742 static void OPNAME ## qpel8_mc13_ ## MMX(uint8_t *dst, uint8_t *src,    \   745     uint64_t half[8 + 9];                                               \   746     uint8_t * const halfH  = ((uint8_t*)half) + 64;                     \   747     uint8_t * const halfHV = ((uint8_t*)half);                          \   748     ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \   750     ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8,           \   752     ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\   753     ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV,         \   757 static void OPNAME ## qpel8_mc33_ ## MMX(uint8_t *dst, uint8_t *src,    \   760     uint64_t half[8 + 9];                                               \   761     uint8_t * const halfH  = ((uint8_t*)half) + 64;                     \   762     uint8_t * const halfHV = ((uint8_t*)half);                          \   763     ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \   765     ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8,       \   767     ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\   768     ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV,         \   772 static void OPNAME ## qpel8_mc21_ ## MMX(uint8_t *dst, uint8_t *src,    \   775     uint64_t half[8 + 9];                                               \   776     uint8_t * const halfH  = ((uint8_t*)half) + 64;                     \   777     uint8_t * const halfHV = ((uint8_t*)half);                          \   778     ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \   780     ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\   781     ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV,             \   785 static void OPNAME ## qpel8_mc23_ ## MMX(uint8_t *dst, uint8_t *src,    \   788     uint64_t half[8 + 9];                                               \   789     uint8_t * const halfH  = ((uint8_t*)half) + 64;                     \   790     uint8_t * const halfHV = ((uint8_t*)half);                          \   791     ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \   793     ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\   794     ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV,         \   798 static void OPNAME ## qpel8_mc12_ ## MMX(uint8_t *dst, uint8_t *src,    \   801     uint64_t half[8 + 9];                                               \   802     uint8_t * const halfH = ((uint8_t*)half);                           \   803     ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \   805     ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH,              \   807     ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH,          \   811 static void OPNAME ## qpel8_mc32_ ## MMX(uint8_t *dst, uint8_t *src,    \   814     uint64_t half[8 + 9];                                               \   815     uint8_t * const halfH = ((uint8_t*)half);                           \   816     ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \   818     ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8,       \   820     ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH,          \   824 static void OPNAME ## qpel8_mc22_ ## MMX(uint8_t *dst, uint8_t *src,    \   828     uint8_t * const halfH = ((uint8_t*)half);                           \   829     ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \   831     ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH,          \   835 static void OPNAME ## qpel16_mc00_ ## MMX (uint8_t *dst, uint8_t *src,  \   838     ff_ ## OPNAME ## pixels16_ ## MMX(dst, src, stride, 16);            \   841 static void OPNAME ## qpel16_mc10_ ## MMX(uint8_t *dst, uint8_t *src,   \   845     uint8_t * const half = (uint8_t*)temp;                              \   846     ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16,      \   848     ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride,        \   852 static void OPNAME ## qpel16_mc20_ ## MMX(uint8_t *dst, uint8_t *src,   \   855     ff_ ## OPNAME ## mpeg4_qpel16_h_lowpass_ ## MMX(dst, src,           \   856                                                     stride, stride, 16);\   859 static void OPNAME ## qpel16_mc30_ ## MMX(uint8_t *dst, uint8_t *src,   \   863     uint8_t * const half = (uint8_t*)temp;                              \   864     ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16,      \   866     ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src + 1, half,            \   867                                          stride, stride, 16);           \   870 static void OPNAME ## qpel16_mc01_ ## MMX(uint8_t *dst, uint8_t *src,   \   874     uint8_t * const half = (uint8_t*)temp;                              \   875     ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16,      \   877     ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride,        \   881 static void OPNAME ## qpel16_mc02_ ## MMX(uint8_t *dst, uint8_t *src,   \   884     ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, src,           \   888 static void OPNAME ## qpel16_mc03_ ## MMX(uint8_t *dst, uint8_t *src,   \   892     uint8_t * const half = (uint8_t*)temp;                              \   893     ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16,      \   895     ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src+stride, half,         \   896                                          stride, stride, 16);           \   899 static void OPNAME ## qpel16_mc11_ ## MMX(uint8_t *dst, uint8_t *src,   \   902     uint64_t half[16 * 2 + 17 * 2];                                     \   903     uint8_t * const halfH  = ((uint8_t*)half) + 256;                    \   904     uint8_t * const halfHV = ((uint8_t*)half);                          \   905     ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \   907     ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16,         \   909     ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH,      \   911     ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV,            \   915 static void OPNAME ## qpel16_mc31_ ## MMX(uint8_t *dst, uint8_t *src,   \   918     uint64_t half[16 * 2 + 17 * 2];                                     \   919     uint8_t * const halfH  = ((uint8_t*)half) + 256;                    \   920     uint8_t * const halfHV = ((uint8_t*)half);                          \   921     ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \   923     ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16,     \   925     ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH,      \   927     ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV,            \   931 static void OPNAME ## qpel16_mc13_ ## MMX(uint8_t *dst, uint8_t *src,   \   934     uint64_t half[16 * 2 + 17 * 2];                                     \   935     uint8_t * const halfH  = ((uint8_t*)half) + 256;                    \   936     uint8_t * const halfHV = ((uint8_t*)half);                          \   937     ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \   939     ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16,         \   941     ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH,      \   943     ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV,       \   947 static void OPNAME ## qpel16_mc33_ ## MMX(uint8_t *dst, uint8_t *src,   \   950     uint64_t half[16 * 2 + 17 * 2];                                     \   951     uint8_t * const halfH  = ((uint8_t*)half) + 256;                    \   952     uint8_t * const halfHV = ((uint8_t*)half);                          \   953     ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \   955     ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16,     \   957     ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH,      \   959     ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV,       \   963 static void OPNAME ## qpel16_mc21_ ## MMX(uint8_t *dst, uint8_t *src,   \   966     uint64_t half[16 * 2 + 17 * 2];                                     \   967     uint8_t * const halfH  = ((uint8_t*)half) + 256;                    \   968     uint8_t * const halfHV = ((uint8_t*)half);                          \   969     ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \   971     ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH,      \   973     ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV,            \   977 static void OPNAME ## qpel16_mc23_ ## MMX(uint8_t *dst, uint8_t *src,   \   980     uint64_t half[16 * 2 + 17 * 2];                                     \   981     uint8_t * const halfH  = ((uint8_t*)half) + 256;                    \   982     uint8_t * const halfHV = ((uint8_t*)half);                          \   983     ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \   985     ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH,      \   987     ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV,       \   991 static void OPNAME ## qpel16_mc12_ ## MMX(uint8_t *dst, uint8_t *src,   \   994     uint64_t half[17 * 2];                                              \   995     uint8_t * const halfH = ((uint8_t*)half);                           \   996     ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \   998     ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16,         \  1000     ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH,         \  1004 static void OPNAME ## qpel16_mc32_ ## MMX(uint8_t *dst, uint8_t *src,   \  1007     uint64_t half[17 * 2];                                              \  1008     uint8_t * const halfH = ((uint8_t*)half);                           \  1009     ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \  1011     ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16,     \  1013     ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH,         \  1017 static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, uint8_t *src,   \  1020     uint64_t half[17 * 2];                                              \  1021     uint8_t * const halfH = ((uint8_t*)half);                           \  1022     ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \  1024     ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH,         \  1030 QPEL_OP(put_no_rnd_,   
ff_pw_15, _no_rnd_, mmxext)
  1037   put_pixels8_xy2_mmx(
dst, 
src, stride, 8);
  1041   put_pixels16_xy2_mmx(
dst, 
src, stride, 16);
  1045   avg_pixels8_xy2_mmx(
dst, 
src, stride, 8);
  1049   avg_pixels16_xy2_mmx(
dst, 
src, stride, 16);
  1053                                    ptrdiff_t linesize, 
int block_w, 
int block_h,
  1054                                    int src_x, 
int src_y, 
int w, 
int h);
  1057                                  int stride, 
int h, 
int ox, 
int oy,
  1058                                  int dxx, 
int dxy, 
int dyx, 
int dyy,
  1060                                  emulated_edge_mc_func *emu_edge_fn)
  1063     const int ix   = ox  >> (16 + 
shift);
  1064     const int iy   = oy  >> (16 + 
shift);
  1065     const int oxs  = ox  >> 4;
  1066     const int oys  = oy  >> 4;
  1067     const int dxxs = dxx >> 4;
  1068     const int dxys = dxy >> 4;
  1069     const int dyxs = dyx >> 4;
  1070     const int dyys = dyy >> 4;
  1071     const uint16_t r4[4]   = { 
r, 
r, 
r, r };
  1072     const uint16_t dxy4[4] = { dxys, dxys, dxys, dxys };
  1073     const uint16_t dyy4[4] = { dyys, dyys, dyys, dyys };
  1075 #define MAX_STRIDE 4096U  1077     uint8_t edge_buf[(MAX_H + 1) * MAX_STRIDE];
  1080     const int dxw = (dxx - (1 << (16 + 
shift))) * (w - 1);
  1081     const int dyh = (dyy - (1 << (16 + 
shift))) * (h - 1);
  1082     const int dxh = dxy * (h - 1);
  1083     const int dyw = dyx * (w - 1);
  1084     int need_emu =  (unsigned)ix >= 
width  - w ||
  1085                     (
unsigned)iy >= 
height - h;
  1088         ((ox ^ (ox + dxw)) | (ox ^ (ox + dxh)) | (ox ^ (ox + dxw + dxh)) |
  1089          (oy ^ (oy + dyw)) | (oy ^ (oy + dyh)) | (oy ^ (oy + dyw + dyh))) >> (16 + 
shift)
  1091         || (dxx | dxy | dyx | dyy) & 15
  1092         || (need_emu && (h > MAX_H || 
stride > MAX_STRIDE))) {
  1094         ff_gmc_c(
dst, 
src, 
stride, h, ox, oy, dxx, dxy, dyx, dyy,
  1101         emu_edge_fn(edge_buf, 
src, stride, w + 1, h + 1, ix, iy, 
width, 
height);
  1106         "movd         %0, %%mm6         \n\t"  1107         "pxor      %%mm7, %%mm7         \n\t"  1108         "punpcklwd %%mm6, %%mm6         \n\t"  1109         "punpcklwd %%mm6, %%mm6         \n\t"  1113     for (x = 0; x < 
w; x += 4) {
  1114         uint16_t dx4[4] = { oxs - dxys + dxxs * (x + 0),
  1115                             oxs - dxys + dxxs * (x + 1),
  1116                             oxs - dxys + dxxs * (x + 2),
  1117                             oxs - dxys + dxxs * (x + 3) };
  1118         uint16_t dy4[4] = { oys - dyys + dyxs * (x + 0),
  1119                             oys - dyys + dyxs * (x + 1),
  1120                             oys - dyys + dyxs * (x + 2),
  1121                             oys - dyys + dyxs * (x + 3) };
  1123         for (y = 0; y < h; y++) {
  1125                 "movq      %0, %%mm4    \n\t"  1126                 "movq      %1, %%mm5    \n\t"  1127                 "paddw     %2, %%mm4    \n\t"  1128                 "paddw     %3, %%mm5    \n\t"  1129                 "movq   %%mm4, %0       \n\t"  1130                 "movq   %%mm5, %1       \n\t"  1131                 "psrlw    $12, %%mm4    \n\t"  1132                 "psrlw    $12, %%mm5    \n\t"  1133                 : 
"+m"(*dx4), 
"+m"(*dy4)
  1134                 : 
"m"(*dxy4), 
"m"(*dyy4)
  1138                 "movq      %%mm6, %%mm2 \n\t"  1139                 "movq      %%mm6, %%mm1 \n\t"  1140                 "psubw     %%mm4, %%mm2 \n\t"  1141                 "psubw     %%mm5, %%mm1 \n\t"  1142                 "movq      %%mm2, %%mm0 \n\t"  1143                 "movq      %%mm4, %%mm3 \n\t"  1144                 "pmullw    %%mm1, %%mm0 \n\t"   1145                 "pmullw    %%mm5, %%mm3 \n\t"   1146                 "pmullw    %%mm5, %%mm2 \n\t"   1147                 "pmullw    %%mm4, %%mm1 \n\t"   1149                 "movd         %4, %%mm5 \n\t"  1150                 "movd         %3, %%mm4 \n\t"  1151                 "punpcklbw %%mm7, %%mm5 \n\t"  1152                 "punpcklbw %%mm7, %%mm4 \n\t"  1153                 "pmullw    %%mm5, %%mm3 \n\t"   1154                 "pmullw    %%mm4, %%mm2 \n\t"   1156                 "movd         %2, %%mm5 \n\t"  1157                 "movd         %1, %%mm4 \n\t"  1158                 "punpcklbw %%mm7, %%mm5 \n\t"  1159                 "punpcklbw %%mm7, %%mm4 \n\t"  1160                 "pmullw    %%mm5, %%mm1 \n\t"   1161                 "pmullw    %%mm4, %%mm0 \n\t"   1162                 "paddw        %5, %%mm1 \n\t"  1163                 "paddw     %%mm3, %%mm2 \n\t"  1164                 "paddw     %%mm1, %%mm0 \n\t"  1165                 "paddw     %%mm2, %%mm0 \n\t"  1167                 "psrlw        %6, %%mm0 \n\t"  1168                 "packuswb  %%mm0, %%mm0 \n\t"  1169                 "movd      %%mm0, %0    \n\t"  1172                 : 
"m"(
src[0]), 
"m"(
src[1]),
  1173                   "m"(
src[stride]), 
"m"(
src[stride + 1]),
  1187                     int stride, 
int h, 
int ox, 
int oy,
  1188                     int dxx, 
int dxy, 
int dyx, 
int dyy,
  1191     gmc(
dst, 
src, 
stride, h, ox, oy, dxx, dxy, dyx, dyy, 
shift, 
r,
  1192         width, height, &ff_emulated_edge_mc_8);
  1196                     int stride, 
int h, 
int ox, 
int oy,
  1197                     int dxx, 
int dxy, 
int dyx, 
int dyy,
  1200     gmc(
dst, 
src, 
stride, h, ox, oy, dxx, dxy, dyx, dyy, 
shift, 
r,
  1201         width, height, &ff_emulated_edge_mc_8);
  1205                     int stride, 
int h, 
int ox, 
int oy,
  1206                     int dxx, 
int dxy, 
int dyx, 
int dyy,
  1209     gmc(
dst, 
src, 
stride, h, ox, oy, dxx, dxy, dyx, dyy, 
shift, 
r,
  1210         width, height, &ff_emulated_edge_mc_8);
  1218     put_pixels8_mmx(
dst, 
src, stride, 8);
  1223     avg_pixels8_mmx(
dst, 
src, stride, 8);
  1228     put_pixels16_mmx(
dst, 
src, stride, 16);
  1233     avg_pixels16_mmx(
dst, 
src, stride, 16);
  1238                                ptrdiff_t 
stride, 
int rnd)
  1243 #if CONFIG_DIRAC_DECODER  1244 #define DIRAC_PIXOP(OPNAME2, OPNAME, EXT)\  1245 void ff_ ## OPNAME2 ## _dirac_pixels8_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h)\  1248         ff_ ## OPNAME2 ## _dirac_pixels8_c(dst, src, stride, h);\  1250         OPNAME ## _pixels8_ ## EXT(dst, src[0], stride, h);\  1252 void ff_ ## OPNAME2 ## _dirac_pixels16_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h)\  1255         ff_ ## OPNAME2 ## _dirac_pixels16_c(dst, src, stride, h);\  1257         OPNAME ## _pixels16_ ## EXT(dst, src[0], stride, h);\  1259 void ff_ ## OPNAME2 ## _dirac_pixels32_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h)\  1262         ff_ ## OPNAME2 ## _dirac_pixels32_c(dst, src, stride, h);\  1264         OPNAME ## _pixels16_ ## EXT(dst   , src[0]   , stride, h);\  1265         OPNAME ## _pixels16_ ## EXT(dst+16, src[0]+16, stride, h);\  1270 DIRAC_PIXOP(
put, 
put, mmx)
  1271 DIRAC_PIXOP(
avg, 
avg, mmx)
  1275 DIRAC_PIXOP(
avg, ff_avg, mmxext)
  1312 static void vector_clipf_sse(
float *
dst, 
const float *
src,
  1317         "movss          %3, %%xmm4      \n\t"  1318         "movss          %4, %%xmm5      \n\t"  1319         "shufps $0, %%xmm4, %%xmm4      \n\t"  1320         "shufps $0, %%xmm5, %%xmm5      \n\t"  1322         "movaps   (%2, %0), %%xmm0      \n\t"   1323         "movaps 16(%2, %0), %%xmm1      \n\t"  1324         "movaps 32(%2, %0), %%xmm2      \n\t"  1325         "movaps 48(%2, %0), %%xmm3      \n\t"  1326         "maxps      %%xmm4, %%xmm0      \n\t"  1327         "maxps      %%xmm4, %%xmm1      \n\t"  1328         "maxps      %%xmm4, %%xmm2      \n\t"  1329         "maxps      %%xmm4, %%xmm3      \n\t"  1330         "minps      %%xmm5, %%xmm0      \n\t"  1331         "minps      %%xmm5, %%xmm1      \n\t"  1332         "minps      %%xmm5, %%xmm2      \n\t"  1333         "minps      %%xmm5, %%xmm3      \n\t"  1334         "movaps     %%xmm0,   (%1, %0)  \n\t"  1335         "movaps     %%xmm1, 16(%1, %0)  \n\t"  1336         "movaps     %%xmm2, 32(%1, %0)  \n\t"  1337         "movaps     %%xmm3, 48(%1, %0)  \n\t"  1357                                                int order, 
int mul);
  1360                                              int order, 
int mul);
  1363                                               int order, 
int mul);
  1366                                         const int16_t *
window, 
unsigned int len);
  1368                                       const int16_t *
window, 
unsigned int len);
  1370                                   const int16_t *
window, 
unsigned int len);
  1372                                 const int16_t *
window, 
unsigned int len);
  1374                                  const int16_t *
window, 
unsigned int len);
  1376                                       const int16_t *
window, 
unsigned int len);
  1383                                           int *left, 
int *left_top);
  1398 #define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX)                          \  1400     c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \  1401     c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \  1402     c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \  1403     c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \  1404     c->PFX ## _pixels_tab[IDX][ 4] = PREFIX ## PFX ## SIZE ## _mc01_ ## CPU; \  1405     c->PFX ## _pixels_tab[IDX][ 5] = PREFIX ## PFX ## SIZE ## _mc11_ ## CPU; \  1406     c->PFX ## _pixels_tab[IDX][ 6] = PREFIX ## PFX ## SIZE ## _mc21_ ## CPU; \  1407     c->PFX ## _pixels_tab[IDX][ 7] = PREFIX ## PFX ## SIZE ## _mc31_ ## CPU; \  1408     c->PFX ## _pixels_tab[IDX][ 8] = PREFIX ## PFX ## SIZE ## _mc02_ ## CPU; \  1409     c->PFX ## _pixels_tab[IDX][ 9] = PREFIX ## PFX ## SIZE ## _mc12_ ## CPU; \  1410     c->PFX ## _pixels_tab[IDX][10] = PREFIX ## PFX ## SIZE ## _mc22_ ## CPU; \  1411     c->PFX ## _pixels_tab[IDX][11] = PREFIX ## PFX ## SIZE ## _mc32_ ## CPU; \  1412     c->PFX ## _pixels_tab[IDX][12] = PREFIX ## PFX ## SIZE ## _mc03_ ## CPU; \  1413     c->PFX ## _pixels_tab[IDX][13] = PREFIX ## PFX ## SIZE ## _mc13_ ## CPU; \  1414     c->PFX ## _pixels_tab[IDX][14] = PREFIX ## PFX ## SIZE ## _mc23_ ## CPU; \  1415     c->PFX ## _pixels_tab[IDX][15] = PREFIX ## PFX ## SIZE ## _mc33_ ## CPU; \  1428     if (!high_bit_depth) {
  1434 #if CONFIG_VIDEODSP && (ARCH_X86_32 || !HAVE_YASM)  1466 #if HAVE_MMXEXT_EXTERNAL  1488     if (!high_bit_depth) {
  1500 #if HAVE_INLINE_ASM && CONFIG_VIDEODSP  1511 #if HAVE_SSE2_INLINE  1520 #if HAVE_SSE2_EXTERNAL  1540 #if HAVE_SSSE3_EXTERNAL  1558 #if HAVE_SSE4_EXTERNAL  1567 #if HAVE_7REGS && HAVE_INLINE_ASM 
void ff_put_pixels8_mmxext(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
static av_cold void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx, int mm_flags)
#define CONFIG_MPEG_XVMC_DECODER
void ff_put_rv40_qpel16_mc33_mmx(uint8_t *block, uint8_t *pixels, ptrdiff_t stride)
int32_t ff_scalarproduct_and_madd_int16_mmxext(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul)
static int shift(int a, int b)
void ff_avg_cavs_qpel8_mc00_mmxext(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
void ff_vector_clip_int32_sse2(int32_t *dst, const int32_t *src, int32_t min, int32_t max, unsigned int len)
void ff_apply_window_int16_ssse3(int16_t *output, const int16_t *input, const int16_t *window, unsigned int len)
void ff_vector_clip_int32_int_sse2(int32_t *dst, const int32_t *src, int32_t min, int32_t max, unsigned int len)
if max(w)>1 w=0.9 *w/max(w)
void ff_put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
void ff_idct_xvid_sse2(short *block)
#define AV_CPU_FLAG_SSE
SSE functions. 
void ff_avg_rv40_qpel16_mc33_mmx(uint8_t *block, uint8_t *pixels, ptrdiff_t stride)
static av_cold void dsputil_init_sse4(DSPContext *c, AVCodecContext *avctx, int mm_flags)
void ff_bswap32_buf_sse2(uint32_t *dst, const uint32_t *src, int w)
void ff_put_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h)
#define AV_CPU_FLAG_CMOV
supports cmov instruction 
void ff_avg_dirac_pixels32_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h)
int bits_per_raw_sample
Bits per sample/pixel of internal libavcodec pixel/sample format. 
void(* clear_block)(int16_t *block)
output residual component w
Macro definitions for various function/variable attributes. 
static av_cold void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx, int mm_flags)
#define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX)
void ff_vector_clip_int32_mmx(int32_t *dst, const int32_t *src, int32_t min, int32_t max, unsigned int len)
#define AV_CPU_FLAG_MMXEXT
SSE integer functions or AMD MMX ext. 
void ff_idct_xvid_sse2_put(uint8_t *dest, int line_size, short *block)
#define AV_CPU_FLAG_ATOM
Atom processor, some SSSE3 instructions are slower. 
int32_t ff_scalarproduct_int16_mmxext(const int16_t *v1, const int16_t *v2, int order)
#define CODEC_FLAG_BITEXACT
Use only bitexact stuff (except (I)DCT). 
int lowres
low resolution decoding, 1-> 1/2 size, 2->1/4 size 
#define AV_CPU_FLAG_SSE2SLOW
SSE2 supported, but usually not faster. 
void ff_apply_window_int16_sse2(int16_t *output, const int16_t *input, const int16_t *window, unsigned int len)
void ff_idct_xvid_mmxext_put(uint8_t *dest, int line_size, int16_t *block)
void ff_put_dirac_pixels32_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h)
void ff_put_cavs_qpel8_mc00_mmxext(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
#define AV_CPU_FLAG_SSE42
Nehalem SSE4.2 functions. 
void(* vector_clipf)(float *dst, const float *src, float min, float max, int len)
#define FF_SSE2_IDCT_PERM
#define AV_CPU_FLAG_SSSE3
Conroe SSSE3 functions. 
void(* add_hfyu_median_prediction)(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int w, int *left, int *left_top)
static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, int mm_flags)
void ff_idct_xvid_sse2_add(uint8_t *dest, int line_size, short *block)
void(* h263_h_loop_filter)(uint8_t *src, int stride, int qscale)
static av_cold void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx, int mm_flags)
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame This method is called when a frame is wanted on an output For an input
void ff_apply_window_int16_ssse3_atom(int16_t *output, const int16_t *input, const int16_t *window, unsigned int len)
void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
void(* clear_blocks)(int16_t *blocks)
int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t *v2, int order)
overlapping window(triangular window to avoid too much overlapping) ovidx
void ff_simple_idct_mmx(int16_t *block)
void(* apply_window_int16)(int16_t *output, const int16_t *input, const int16_t *window, unsigned int len)
Apply symmetric window in 16-bit fixed-point. 
void ff_apply_window_int16_mmxext(int16_t *output, const int16_t *input, const int16_t *window, unsigned int len)
void ff_simple_idct_put_mmx(uint8_t *dest, int line_size, int16_t *block)
void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int rnd)
void ff_put_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int line_size)
#define av_assert1(cond)
assert() equivalent, that does not lie in speed critical code. 
int32_t(* scalarproduct_and_madd_int16)(int16_t *v1, const int16_t *v2, const int16_t *v3, int len, int mul)
Calculate scalar product of v1 and v2, and v1[i] += v3[i] * mul. 
void(* draw_edges)(uint8_t *buf, int wrap, int width, int height, int w, int h, int sides)
int idct_algo
IDCT algorithm, see FF_IDCT_* below. 
void(* put_signed_pixels_clamped)(const int16_t *block, uint8_t *pixels, int line_size)
void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx)
void(* add_bytes)(uint8_t *dst, uint8_t *src, int w)
void ff_simple_idct_add_mmx(uint8_t *dest, int line_size, int16_t *block)
void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int line_size)
void(* put_pixels_clamped)(const int16_t *block, uint8_t *pixels, int line_size)
static av_cold void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx, int mm_flags)
void ff_vector_clip_int32_sse4(int32_t *dst, const int32_t *src, int32_t min, int32_t max, unsigned int len)
void ff_h263_h_loop_filter_mmx(uint8_t *src, int stride, int qscale)
#define diff(a, as, b, bs)
int xvmc_acceleration
XVideo Motion Acceleration. 
void ff_avg_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h)
#define AV_CPU_FLAG_3DNOW
AMD 3DNOW. 
void(* add_pixels_clamped)(const int16_t *block, uint8_t *pixels, int line_size)
void(* vector_clip_int32)(int32_t *dst, const int32_t *src, int32_t min, int32_t max, unsigned int len)
Clip each element in an array of int32_t to a given minimum and maximum value. 
int idct_permutation_type
void ff_bswap32_buf_ssse3(uint32_t *dst, const uint32_t *src, int w)
void(* idct_add)(uint8_t *dest, int line_size, int16_t *block)
block -> idct -> add dest -> clip to unsigned 8 bit -> dest. 
main external API structure. 
#define AV_CPU_FLAG_MMX
standard MMX 
#define FF_SIMPLE_IDCT_PERM
void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int line_size)
void ff_add_hfyu_median_prediction_mmxext(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int w, int *left, int *left_top)
int32_t ff_scalarproduct_and_madd_int16_sse2(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul)
#define CONFIG_H263_DECODER
BYTE int const BYTE int int int height
synthesis window for stochastic i
void(* bswap_buf)(uint32_t *dst, const uint32_t *src, int w)
void(* gmc)(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
global motion compensation. 
#define AV_CPU_FLAG_SSE4
Penryn SSE4.1 functions. 
void ff_idct_xvid_mmx_add(uint8_t *dest, int line_size, int16_t *block)
void ff_put_cavs_qpel16_mc00_mmxext(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
void ff_idct_xvid_mmx_put(uint8_t *dest, int line_size, int16_t *block)
int32_t(* scalarproduct_int16)(const int16_t *v1, const int16_t *v2, int len)
Calculate scalar product of two vectors. 
void ff_idct_xvid_mmxext_add(uint8_t *dest, int line_size, int16_t *block)
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU. 
void ff_apply_window_int16_round_sse2(int16_t *output, const int16_t *input, const int16_t *window, unsigned int len)
void(* idct)(int16_t *block)
header for Xvid IDCT functions 
av_cold void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx)
void ff_apply_window_int16_round_mmxext(int16_t *output, const int16_t *input, const int16_t *window, unsigned int len)
static const int shift2[6]
these buffered frames must be flushed immediately if a new input produces new output(Example:frame rate-doubling filter:filter_frame must(1) flush the second copy of the previous frame, if it is still there,(2) push the first copy of the incoming frame,(3) keep the second copy for later.) If the input frame is not enough to produce output
#define CONFIG_H263_ENCODER
int(* add_hfyu_left_prediction)(uint8_t *dst, const uint8_t *src, int w, int left)
Core video DSP helper functions. 
void ff_put_rv40_qpel8_mc33_mmx(uint8_t *block, uint8_t *pixels, ptrdiff_t stride)
void(* idct_put)(uint8_t *dest, int line_size, int16_t *block)
block -> idct -> clip to unsigned 8 bit -> dest. 
void(* h263_v_loop_filter)(uint8_t *src, int stride, int qscale)
int ff_add_hfyu_left_prediction_ssse3(uint8_t *dst, const uint8_t *src, int w, int left)
else dst[i][x+y *dst_stride[i]]
void ff_avg_cavs_qpel16_mc00_mmxext(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
int ff_add_hfyu_left_prediction_sse4(uint8_t *dst, const uint8_t *src, int w, int left)
#define FF_IDCT_SIMPLEMMX
#define AV_CPU_FLAG_SSE2
PIV SSE2 functions. 
void ff_idct_xvid_mmx(short *block)
int32_t ff_scalarproduct_and_madd_int16_ssse3(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul)
DECLARE_ALIGNED(8, const uint64_t, ff_pw_15)=0
void ff_idct_xvid_mmxext(short *block)
void ff_avg_pixels8_mmxext(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
void ff_h263_v_loop_filter_mmx(uint8_t *src, int stride, int qscale)
void ff_avg_rv40_qpel8_mc33_mmx(uint8_t *block, uint8_t *pixels, ptrdiff_t stride)