30 #define IDCT_ADD_FUNC(NUM, DEPTH, OPT) \ 31 void ff_h264_idct ## NUM ## _add_ ## DEPTH ## _ ## OPT(uint8_t *dst, \ 37 IDCT_ADD_FUNC(_dc, 8, mmxext)
38 IDCT_ADD_FUNC(_dc, 10, mmxext)
39 IDCT_ADD_FUNC(8_dc, 8, mmxext)
40 IDCT_ADD_FUNC(8_dc, 10, sse2)
41 IDCT_ADD_FUNC(8, 8, mmx)
42 IDCT_ADD_FUNC(8, 8, sse2)
43 IDCT_ADD_FUNC(8, 10, sse2)
44 IDCT_ADD_FUNC(, 10, avx)
45 IDCT_ADD_FUNC(8_dc, 10, avx)
46 IDCT_ADD_FUNC(8, 10, avx)
49 #define IDCT_ADD_REP_FUNC(NUM, REP, DEPTH, OPT) \ 50 void ff_h264_idct ## NUM ## _add ## REP ## _ ## DEPTH ## _ ## OPT \ 51 (uint8_t *dst, const int *block_offset, \ 52 int16_t *block, int stride, const uint8_t nnzc[6 * 8]); 56 IDCT_ADD_REP_FUNC(8, 4, 8, sse2)
57 IDCT_ADD_REP_FUNC(8, 4, 10, sse2)
58 IDCT_ADD_REP_FUNC(8, 4, 10, avx)
59 IDCT_ADD_REP_FUNC(, 16, 8, mmx)
60 IDCT_ADD_REP_FUNC(, 16, 8, mmxext)
61 IDCT_ADD_REP_FUNC(, 16, 8, sse2)
62 IDCT_ADD_REP_FUNC(, 16, 10, sse2)
63 IDCT_ADD_REP_FUNC(, 16intra, 8, mmx)
64 IDCT_ADD_REP_FUNC(, 16intra, 8, mmxext)
65 IDCT_ADD_REP_FUNC(, 16intra, 8, sse2)
66 IDCT_ADD_REP_FUNC(, 16intra, 10, sse2)
67 IDCT_ADD_REP_FUNC(, 16, 10, avx)
68 IDCT_ADD_REP_FUNC(, 16intra, 10, avx)
71 #define IDCT_ADD_REP_FUNC2(NUM, REP, DEPTH, OPT) \ 72 void ff_h264_idct ## NUM ## _add ## REP ## _ ## DEPTH ## _ ## OPT \ 73 (uint8_t **dst, const int *block_offset, \ 74 int16_t *block, int stride, const uint8_t nnzc[6 * 8]); 78 IDCT_ADD_REP_FUNC2(, 8, 8, sse2)
79 IDCT_ADD_REP_FUNC2(, 8, 10, sse2)
80 IDCT_ADD_REP_FUNC2(, 8, 10, avx)
91 int bidir,
int edges,
int step,
92 int mask_mv0,
int mask_mv1,
int field);
94 #define LF_FUNC(DIR, TYPE, DEPTH, OPT) \ 95 void ff_deblock_ ## DIR ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT(uint8_t *pix, \ 100 #define LF_IFUNC(DIR, TYPE, DEPTH, OPT) \ 101 void ff_deblock_ ## DIR ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT(uint8_t *pix, \ 106 #define LF_FUNCS(type, depth) \ 107 LF_FUNC(h, chroma, depth, mmxext) \ 108 LF_IFUNC(h, chroma_intra, depth, mmxext) \ 109 LF_FUNC(v, chroma, depth, mmxext) \ 110 LF_IFUNC(v, chroma_intra, depth, mmxext) \ 111 LF_FUNC(h, luma, depth, mmxext) \ 112 LF_IFUNC(h, luma_intra, depth, mmxext) \ 113 LF_FUNC(h, luma, depth, sse2) \ 114 LF_IFUNC(h, luma_intra, depth, sse2) \ 115 LF_FUNC(v, luma, depth, sse2) \ 116 LF_IFUNC(v, luma_intra, depth, sse2) \ 117 LF_FUNC(h, chroma, depth, sse2) \ 118 LF_IFUNC(h, chroma_intra, depth, sse2) \ 119 LF_FUNC(v, chroma, depth, sse2) \ 120 LF_IFUNC(v, chroma_intra, depth, sse2) \ 121 LF_FUNC(h, luma, depth, avx) \ 122 LF_IFUNC(h, luma_intra, depth, avx) \ 123 LF_FUNC(v, luma, depth, avx) \ 124 LF_IFUNC(v, luma_intra, depth, avx) \ 125 LF_FUNC(h, chroma, depth, avx) \ 126 LF_IFUNC(h, chroma_intra, depth, avx) \ 127 LF_FUNC(v, chroma, depth, avx) \ 128 LF_IFUNC(v, chroma_intra, depth, avx) 133 #if ARCH_X86_32 && HAVE_MMXEXT_EXTERNAL 136 int beta, int8_t *tc0)
138 if ((tc0[0] & tc0[1]) >= 0)
139 ff_deblock_v8_luma_8_mmxext(pix + 0, stride, alpha, beta, tc0);
140 if ((tc0[2] & tc0[3]) >= 0)
141 ff_deblock_v8_luma_8_mmxext(pix + 8, stride, alpha, beta, tc0 + 2);
144 static void ff_deblock_v_luma_intra_8_mmxext(
uint8_t *pix,
int stride,
147 ff_deblock_v8_luma_intra_8_mmxext(pix + 0, stride, alpha, beta);
148 ff_deblock_v8_luma_intra_8_mmxext(pix + 8, stride, alpha, beta);
158 #define H264_WEIGHT(W, OPT) \ 159 void ff_h264_weight_ ## W ## _ ## OPT(uint8_t *dst, int stride, \ 160 int height, int log2_denom, \ 161 int weight, int offset); 163 #define H264_BIWEIGHT(W, OPT) \ 164 void ff_h264_biweight_ ## W ## _ ## OPT(uint8_t *dst, uint8_t *src, \ 165 int stride, int height, \ 166 int log2_denom, int weightd, \ 167 int weights, int offset); 169 #define H264_BIWEIGHT_MMX(W) \ 170 H264_WEIGHT(W, mmxext) \ 171 H264_BIWEIGHT(W, mmxext) 173 #define H264_BIWEIGHT_MMX_SSE(W) \ 174 H264_BIWEIGHT_MMX(W) \ 175 H264_WEIGHT(W, sse2) \ 176 H264_BIWEIGHT(W, sse2) \ 177 H264_BIWEIGHT(W, ssse3) 183 #define H264_WEIGHT_10(W, DEPTH, OPT) \ 184 void ff_h264_weight_ ## W ## _ ## DEPTH ## _ ## OPT(uint8_t *dst, \ 191 #define H264_BIWEIGHT_10(W, DEPTH, OPT) \ 192 void ff_h264_biweight_ ## W ## _ ## DEPTH ## _ ## OPT(uint8_t *dst, \ 201 #define H264_BIWEIGHT_10_SSE(W, DEPTH) \ 202 H264_WEIGHT_10(W, DEPTH, sse2) \ 203 H264_WEIGHT_10(W, DEPTH, sse4) \ 204 H264_BIWEIGHT_10(W, DEPTH, sse2) \ 205 H264_BIWEIGHT_10(W, DEPTH, sse4) 212 const
int chroma_format_idc)
220 if (bit_depth == 8) {
222 c->h264_idct_dc_add =
223 c->h264_idct_add = ff_h264_idct_add_8_mmx;
224 c->h264_idct8_dc_add =
225 c->h264_idct8_add = ff_h264_idct8_add_8_mmx;
227 c->h264_idct_add16 = ff_h264_idct_add16_8_mmx;
228 c->h264_idct8_add4 = ff_h264_idct8_add4_8_mmx;
229 if (chroma_format_idc == 1)
230 c->h264_idct_add8 = ff_h264_idct_add8_8_mmx;
231 c->h264_idct_add16intra = ff_h264_idct_add16intra_8_mmx;
236 c->h264_idct_dc_add = ff_h264_idct_dc_add_8_mmxext;
237 c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_mmxext;
238 c->h264_idct_add16 = ff_h264_idct_add16_8_mmxext;
239 c->h264_idct8_add4 = ff_h264_idct8_add4_8_mmxext;
240 if (chroma_format_idc == 1)
241 c->h264_idct_add8 = ff_h264_idct_add8_8_mmxext;
242 c->h264_idct_add16intra = ff_h264_idct_add16intra_8_mmxext;
244 c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_8_mmxext;
245 c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_8_mmxext;
246 if (chroma_format_idc == 1) {
247 c->h264_h_loop_filter_chroma = ff_deblock_h_chroma_8_mmxext;
248 c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma_intra_8_mmxext;
250 #if ARCH_X86_32 && HAVE_MMXEXT_EXTERNAL 251 c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_mmxext;
252 c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_mmxext;
253 c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_mmxext;
254 c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_mmxext;
256 c->weight_h264_pixels_tab[0] = ff_h264_weight_16_mmxext;
257 c->weight_h264_pixels_tab[1] = ff_h264_weight_8_mmxext;
258 c->weight_h264_pixels_tab[2] = ff_h264_weight_4_mmxext;
260 c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_mmxext;
261 c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_mmxext;
262 c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_mmxext;
265 c->h264_idct8_add = ff_h264_idct8_add_8_sse2;
267 c->h264_idct_add16 = ff_h264_idct_add16_8_sse2;
268 c->h264_idct8_add4 = ff_h264_idct8_add4_8_sse2;
269 if (chroma_format_idc == 1)
270 c->h264_idct_add8 = ff_h264_idct_add8_8_sse2;
271 c->h264_idct_add16intra = ff_h264_idct_add16intra_8_sse2;
274 c->weight_h264_pixels_tab[0] = ff_h264_weight_16_sse2;
275 c->weight_h264_pixels_tab[1] = ff_h264_weight_8_sse2;
277 c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_sse2;
278 c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_sse2;
280 c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_sse2;
281 c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_sse2;
282 c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_sse2;
283 c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_sse2;
286 c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_ssse3;
287 c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_ssse3;
290 c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_avx;
291 c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_avx;
292 c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_avx;
293 c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_avx;
297 }
else if (bit_depth == 10) {
301 c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_10_mmxext;
302 c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_mmxext;
303 c->h264_v_loop_filter_luma = ff_deblock_v_luma_10_mmxext;
304 c->h264_h_loop_filter_luma = ff_deblock_h_luma_10_mmxext;
305 c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_mmxext;
306 c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_mmxext;
308 c->h264_idct_dc_add = ff_h264_idct_dc_add_10_mmxext;
310 c->h264_idct_add = ff_h264_idct_add_10_sse2;
311 c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_sse2;
313 c->h264_idct_add16 = ff_h264_idct_add16_10_sse2;
314 if (chroma_format_idc == 1)
315 c->h264_idct_add8 = ff_h264_idct_add8_10_sse2;
316 c->h264_idct_add16intra = ff_h264_idct_add16intra_10_sse2;
317 #if HAVE_ALIGNED_STACK 318 c->h264_idct8_add = ff_h264_idct8_add_10_sse2;
319 c->h264_idct8_add4 = ff_h264_idct8_add4_10_sse2;
322 c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse2;
323 c->weight_h264_pixels_tab[1] = ff_h264_weight_8_10_sse2;
324 c->weight_h264_pixels_tab[2] = ff_h264_weight_4_10_sse2;
326 c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_10_sse2;
327 c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse2;
328 c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse2;
330 c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_10_sse2;
331 c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_sse2;
332 #if HAVE_ALIGNED_STACK 333 c->h264_v_loop_filter_luma = ff_deblock_v_luma_10_sse2;
334 c->h264_h_loop_filter_luma = ff_deblock_h_luma_10_sse2;
335 c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_sse2;
336 c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_sse2;
340 c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse4;
341 c->weight_h264_pixels_tab[1] = ff_h264_weight_8_10_sse4;
342 c->weight_h264_pixels_tab[2] = ff_h264_weight_4_10_sse4;
344 c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_10_sse4;
345 c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse4;
346 c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse4;
349 c->h264_idct_dc_add =
350 c->h264_idct_add = ff_h264_idct_add_10_avx;
351 c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_avx;
353 c->h264_idct_add16 = ff_h264_idct_add16_10_avx;
354 if (chroma_format_idc == 1)
355 c->h264_idct_add8 = ff_h264_idct_add8_10_avx;
356 c->h264_idct_add16intra = ff_h264_idct_add16intra_10_avx;
357 #if HAVE_ALIGNED_STACK 358 c->h264_idct8_add = ff_h264_idct8_add_10_avx;
359 c->h264_idct8_add4 = ff_h264_idct8_add4_10_avx;
362 c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_10_avx;
363 c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_avx;
364 #if HAVE_ALIGNED_STACK 365 c->h264_v_loop_filter_luma = ff_deblock_v_luma_10_avx;
366 c->h264_h_loop_filter_luma = ff_deblock_h_luma_10_avx;
367 c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_avx;
368 c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_avx;
#define LF_FUNCS(type, depth)
#define EXTERNAL_MMX(flags)
FIXME Range Coding of cr are ref
void ff_h264_luma_dc_dequant_idct_sse2(int16_t *output, int16_t *input, int qmul)
#define LF_FUNC(DIR, TYPE, DEPTH, OPT)
#define AV_CPU_FLAG_CMOV
supports cmov instruction
Macro definitions for various function/variable attributes.
#define EXTERNAL_SSE4(flags)
#define IDCT_ADD_REP_FUNC(NUM, REP, DEPTH, OPT)
void ff_h264_luma_dc_dequant_idct_mmx(int16_t *output, int16_t *input, int qmul)
#define EXTERNAL_SSE2(flags)
static double alpha(void *priv, double x, double y)
void ff_h264_loop_filter_strength_mmxext(int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2], int bidir, int edges, int step, int mask_mv0, int mask_mv1, int field)
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame This method is called when a frame is wanted on an output For an input
#define H264_BIWEIGHT_MMX_SSE(W)
#define H264_BIWEIGHT_10_SSE(W, DEPTH)
av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chroma_format_idc)
Context for storing H.264 DSP functions.
#define LF_IFUNC(DIR, TYPE, DEPTH, OPT)
static const int8_t mv[256][2]
#define EXTERNAL_SSSE3(flags)
#define IDCT_ADD_FUNC(NUM, DEPTH, OPT)
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
#define EXTERNAL_MMXEXT(flags)
these buffered frames must be flushed immediately if a new input produces new output(Example:frame rate-doubling filter:filter_frame must(1) flush the second copy of the previous frame, if it is still there,(2) push the first copy of the incoming frame,(3) keep the second copy for later.) If the input frame is not enough to produce output
#define IDCT_ADD_REP_FUNC2(NUM, REP, DEPTH, OPT)
#define H264_BIWEIGHT_MMX(W)
#define EXTERNAL_AVX(flags)
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But a word about which is also called distortion Distortion can be quantified by almost any quality measurement one chooses the sum of squared differences is used but more complex methods that consider psychovisual effects can be used as well It makes no difference in this discussion First step