x86/swscale.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <inttypes.h>
22 #include "config.h"
23 #include "libswscale/swscale.h"
25 #include "libavutil/attributes.h"
26 #include "libavutil/avassert.h"
27 #include "libavutil/intreadwrite.h"
28 #include "libavutil/x86/asm.h"
29 #include "libavutil/x86/cpu.h"
30 #include "libavutil/cpu.h"
31 #include "libavutil/pixdesc.h"
32 
33 #if HAVE_INLINE_ASM
34 
35 #define DITHER1XBPP
36 
37 DECLARE_ASM_CONST(8, uint64_t, bF8)= 0xF8F8F8F8F8F8F8F8LL;
38 DECLARE_ASM_CONST(8, uint64_t, bFC)= 0xFCFCFCFCFCFCFCFCLL;
39 DECLARE_ASM_CONST(8, uint64_t, w10)= 0x0010001000100010LL;
40 DECLARE_ASM_CONST(8, uint64_t, w02)= 0x0002000200020002LL;
41 
42 const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = {
43  0x0103010301030103LL,
44  0x0200020002000200LL,};
45 
46 const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = {
47  0x0602060206020602LL,
48  0x0004000400040004LL,};
49 
50 DECLARE_ASM_CONST(8, uint64_t, b16Mask)= 0x001F001F001F001FLL;
51 DECLARE_ASM_CONST(8, uint64_t, g16Mask)= 0x07E007E007E007E0LL;
52 DECLARE_ASM_CONST(8, uint64_t, r16Mask)= 0xF800F800F800F800LL;
53 DECLARE_ASM_CONST(8, uint64_t, b15Mask)= 0x001F001F001F001FLL;
54 DECLARE_ASM_CONST(8, uint64_t, g15Mask)= 0x03E003E003E003E0LL;
55 DECLARE_ASM_CONST(8, uint64_t, r15Mask)= 0x7C007C007C007C00LL;
56 
57 DECLARE_ALIGNED(8, const uint64_t, ff_M24A) = 0x00FF0000FF0000FFLL;
58 DECLARE_ALIGNED(8, const uint64_t, ff_M24B) = 0xFF0000FF0000FF00LL;
59 DECLARE_ALIGNED(8, const uint64_t, ff_M24C) = 0x0000FF0000FF0000LL;
60 
61 DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff) = 0x000020E540830C8BULL;
62 DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff) = 0x0000ED0FDAC23831ULL;
63 DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff) = 0x00003831D0E6F6EAULL;
64 
65 DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YOffset) = 0x1010101010101010ULL;
66 DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL;
67 DECLARE_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL;
68 
69 
70 //MMX versions
71 #if HAVE_MMX_INLINE
72 #undef RENAME
73 #define COMPILE_TEMPLATE_MMXEXT 0
74 #define RENAME(a) a ## _MMX
75 #include "swscale_template.c"
76 #endif
77 
78 // MMXEXT versions
79 #if HAVE_MMXEXT_INLINE
80 #undef RENAME
81 #undef COMPILE_TEMPLATE_MMXEXT
82 #define COMPILE_TEMPLATE_MMXEXT 1
83 #define RENAME(a) a ## _MMXEXT
84 #include "swscale_template.c"
85 #endif
86 
88  int lastInLumBuf, int lastInChrBuf)
89 {
90  const int dstH= c->dstH;
91  const int flags= c->flags;
92  int16_t **lumPixBuf= c->lumPixBuf;
93  int16_t **chrUPixBuf= c->chrUPixBuf;
94  int16_t **alpPixBuf= c->alpPixBuf;
95  const int vLumBufSize= c->vLumBufSize;
96  const int vChrBufSize= c->vChrBufSize;
99  int16_t *vLumFilter= c->vLumFilter;
100  int16_t *vChrFilter= c->vChrFilter;
104  const int vLumFilterSize= c->vLumFilterSize;
105  const int vChrFilterSize= c->vChrFilterSize;
106  const int chrDstY= dstY>>c->chrDstVSubSample;
107  const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
108  const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
109 
110  c->blueDither= ff_dither8[dstY&1];
112  c->greenDither= ff_dither8[dstY&1];
113  else
114  c->greenDither= ff_dither4[dstY&1];
115  c->redDither= ff_dither8[(dstY+1)&1];
116  if (dstY < dstH - 2) {
117  const int16_t **lumSrcPtr= (const int16_t **)(void*) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
118  const int16_t **chrUSrcPtr= (const int16_t **)(void*) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
119  const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)(void*) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
120  int i;
121 
122  if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->srcH) {
123  const int16_t **tmpY = (const int16_t **) lumPixBuf + 2 * vLumBufSize;
124  int neg = -firstLumSrcY, i, end = FFMIN(c->srcH - firstLumSrcY, vLumFilterSize);
125  for (i = 0; i < neg; i++)
126  tmpY[i] = lumSrcPtr[neg];
127  for ( ; i < end; i++)
128  tmpY[i] = lumSrcPtr[i];
129  for ( ; i < vLumFilterSize; i++)
130  tmpY[i] = tmpY[i-1];
131  lumSrcPtr = tmpY;
132 
133  if (alpSrcPtr) {
134  const int16_t **tmpA = (const int16_t **) alpPixBuf + 2 * vLumBufSize;
135  for (i = 0; i < neg; i++)
136  tmpA[i] = alpSrcPtr[neg];
137  for ( ; i < end; i++)
138  tmpA[i] = alpSrcPtr[i];
139  for ( ; i < vLumFilterSize; i++)
140  tmpA[i] = tmpA[i - 1];
141  alpSrcPtr = tmpA;
142  }
143  }
144  if (firstChrSrcY < 0 || firstChrSrcY + vChrFilterSize > c->chrSrcH) {
145  const int16_t **tmpU = (const int16_t **) chrUPixBuf + 2 * vChrBufSize;
146  int neg = -firstChrSrcY, i, end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize);
147  for (i = 0; i < neg; i++) {
148  tmpU[i] = chrUSrcPtr[neg];
149  }
150  for ( ; i < end; i++) {
151  tmpU[i] = chrUSrcPtr[i];
152  }
153  for ( ; i < vChrFilterSize; i++) {
154  tmpU[i] = tmpU[i - 1];
155  }
156  chrUSrcPtr = tmpU;
157  }
158 
159  if (flags & SWS_ACCURATE_RND) {
160  int s= APCK_SIZE / 8;
161  for (i=0; i<vLumFilterSize; i+=2) {
162  *(const void**)&lumMmxFilter[s*i ]= lumSrcPtr[i ];
163  *(const void**)&lumMmxFilter[s*i+APCK_PTR2/4 ]= lumSrcPtr[i+(vLumFilterSize>1)];
164  lumMmxFilter[s*i+APCK_COEF/4 ]=
165  lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i ]
166  + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0);
167  if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
168  *(const void**)&alpMmxFilter[s*i ]= alpSrcPtr[i ];
169  *(const void**)&alpMmxFilter[s*i+APCK_PTR2/4 ]= alpSrcPtr[i+(vLumFilterSize>1)];
170  alpMmxFilter[s*i+APCK_COEF/4 ]=
171  alpMmxFilter[s*i+APCK_COEF/4+1]= lumMmxFilter[s*i+APCK_COEF/4 ];
172  }
173  }
174  for (i=0; i<vChrFilterSize; i+=2) {
175  *(const void**)&chrMmxFilter[s*i ]= chrUSrcPtr[i ];
176  *(const void**)&chrMmxFilter[s*i+APCK_PTR2/4 ]= chrUSrcPtr[i+(vChrFilterSize>1)];
177  chrMmxFilter[s*i+APCK_COEF/4 ]=
178  chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i ]
179  + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0);
180  }
181  } else {
182  for (i=0; i<vLumFilterSize; i++) {
183  *(const void**)&lumMmxFilter[4*i+0]= lumSrcPtr[i];
184  lumMmxFilter[4*i+2]=
185  lumMmxFilter[4*i+3]=
186  ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001U;
187  if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
188  *(const void**)&alpMmxFilter[4*i+0]= alpSrcPtr[i];
189  alpMmxFilter[4*i+2]=
190  alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2];
191  }
192  }
193  for (i=0; i<vChrFilterSize; i++) {
194  *(const void**)&chrMmxFilter[4*i+0]= chrUSrcPtr[i];
195  chrMmxFilter[4*i+2]=
196  chrMmxFilter[4*i+3]=
197  ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001U;
198  }
199  }
200  }
201 }
202 
203 #if HAVE_MMXEXT
204 static void yuv2yuvX_sse3(const int16_t *filter, int filterSize,
205  const int16_t **src, uint8_t *dest, int dstW,
206  const uint8_t *dither, int offset)
207 {
208  if(((int)dest) & 15){
209  return yuv2yuvX_MMXEXT(filter, filterSize, src, dest, dstW, dither, offset);
210  }
211  if (offset) {
212  __asm__ volatile("movq (%0), %%xmm3\n\t"
213  "movdqa %%xmm3, %%xmm4\n\t"
214  "psrlq $24, %%xmm3\n\t"
215  "psllq $40, %%xmm4\n\t"
216  "por %%xmm4, %%xmm3\n\t"
217  :: "r"(dither)
218  );
219  } else {
220  __asm__ volatile("movq (%0), %%xmm3\n\t"
221  :: "r"(dither)
222  );
223  }
224  filterSize--;
225  __asm__ volatile(
226  "pxor %%xmm0, %%xmm0\n\t"
227  "punpcklbw %%xmm0, %%xmm3\n\t"
228  "movd %0, %%xmm1\n\t"
229  "punpcklwd %%xmm1, %%xmm1\n\t"
230  "punpckldq %%xmm1, %%xmm1\n\t"
231  "punpcklqdq %%xmm1, %%xmm1\n\t"
232  "psllw $3, %%xmm1\n\t"
233  "paddw %%xmm1, %%xmm3\n\t"
234  "psraw $4, %%xmm3\n\t"
235  ::"m"(filterSize)
236  );
237  __asm__ volatile(
238  "movdqa %%xmm3, %%xmm4\n\t"
239  "movdqa %%xmm3, %%xmm7\n\t"
240  "movl %3, %%ecx\n\t"
241  "mov %0, %%"REG_d" \n\t"\
242  "mov (%%"REG_d"), %%"REG_S" \n\t"\
243  ".p2align 4 \n\t" /* FIXME Unroll? */\
244  "1: \n\t"\
245  "movddup 8(%%"REG_d"), %%xmm0 \n\t" /* filterCoeff */\
246  "movdqa (%%"REG_S", %%"REG_c", 2), %%xmm2 \n\t" /* srcData */\
247  "movdqa 16(%%"REG_S", %%"REG_c", 2), %%xmm5 \n\t" /* srcData */\
248  "add $16, %%"REG_d" \n\t"\
249  "mov (%%"REG_d"), %%"REG_S" \n\t"\
250  "test %%"REG_S", %%"REG_S" \n\t"\
251  "pmulhw %%xmm0, %%xmm2 \n\t"\
252  "pmulhw %%xmm0, %%xmm5 \n\t"\
253  "paddw %%xmm2, %%xmm3 \n\t"\
254  "paddw %%xmm5, %%xmm4 \n\t"\
255  " jnz 1b \n\t"\
256  "psraw $3, %%xmm3 \n\t"\
257  "psraw $3, %%xmm4 \n\t"\
258  "packuswb %%xmm4, %%xmm3 \n\t"
259  "movntdq %%xmm3, (%1, %%"REG_c")\n\t"
260  "add $16, %%"REG_c" \n\t"\
261  "cmp %2, %%"REG_c" \n\t"\
262  "movdqa %%xmm7, %%xmm3\n\t"
263  "movdqa %%xmm7, %%xmm4\n\t"
264  "mov %0, %%"REG_d" \n\t"\
265  "mov (%%"REG_d"), %%"REG_S" \n\t"\
266  "jb 1b \n\t"\
267  :: "g" (filter),
268  "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset)
269  : "%"REG_d, "%"REG_S, "%"REG_c
270  );
271 }
272 #endif
273 
274 #endif /* HAVE_INLINE_ASM */
275 
276 #define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \
277 void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \
278  SwsContext *c, int16_t *data, \
279  int dstW, const uint8_t *src, \
280  const int16_t *filter, \
281  const int32_t *filterPos, int filterSize)
282 
283 #define SCALE_FUNCS(filter_n, opt) \
284  SCALE_FUNC(filter_n, 8, 15, opt); \
285  SCALE_FUNC(filter_n, 9, 15, opt); \
286  SCALE_FUNC(filter_n, 10, 15, opt); \
287  SCALE_FUNC(filter_n, 12, 15, opt); \
288  SCALE_FUNC(filter_n, 14, 15, opt); \
289  SCALE_FUNC(filter_n, 16, 15, opt); \
290  SCALE_FUNC(filter_n, 8, 19, opt); \
291  SCALE_FUNC(filter_n, 9, 19, opt); \
292  SCALE_FUNC(filter_n, 10, 19, opt); \
293  SCALE_FUNC(filter_n, 12, 19, opt); \
294  SCALE_FUNC(filter_n, 14, 19, opt); \
295  SCALE_FUNC(filter_n, 16, 19, opt)
296 
297 #define SCALE_FUNCS_MMX(opt) \
298  SCALE_FUNCS(4, opt); \
299  SCALE_FUNCS(8, opt); \
300  SCALE_FUNCS(X, opt)
301 
302 #define SCALE_FUNCS_SSE(opt) \
303  SCALE_FUNCS(4, opt); \
304  SCALE_FUNCS(8, opt); \
305  SCALE_FUNCS(X4, opt); \
306  SCALE_FUNCS(X8, opt)
307 
308 #if ARCH_X86_32
309 SCALE_FUNCS_MMX(mmx);
310 #endif
311 SCALE_FUNCS_SSE(sse2);
312 SCALE_FUNCS_SSE(ssse3);
313 SCALE_FUNCS_SSE(sse4);
314 
315 #define VSCALEX_FUNC(size, opt) \
316 void ff_yuv2planeX_ ## size ## _ ## opt(const int16_t *filter, int filterSize, \
317  const int16_t **src, uint8_t *dest, int dstW, \
318  const uint8_t *dither, int offset)
319 #define VSCALEX_FUNCS(opt) \
320  VSCALEX_FUNC(8, opt); \
321  VSCALEX_FUNC(9, opt); \
322  VSCALEX_FUNC(10, opt)
323 
324 #if ARCH_X86_32
325 VSCALEX_FUNCS(mmxext);
326 #endif
327 VSCALEX_FUNCS(sse2);
328 VSCALEX_FUNCS(sse4);
329 VSCALEX_FUNC(16, sse4);
330 VSCALEX_FUNCS(avx);
331 
332 #define VSCALE_FUNC(size, opt) \
333 void ff_yuv2plane1_ ## size ## _ ## opt(const int16_t *src, uint8_t *dst, int dstW, \
334  const uint8_t *dither, int offset)
335 #define VSCALE_FUNCS(opt1, opt2) \
336  VSCALE_FUNC(8, opt1); \
337  VSCALE_FUNC(9, opt2); \
338  VSCALE_FUNC(10, opt2); \
339  VSCALE_FUNC(16, opt1)
340 
341 #if ARCH_X86_32
342 VSCALE_FUNCS(mmx, mmxext);
343 #endif
344 VSCALE_FUNCS(sse2, sse2);
345 VSCALE_FUNC(16, sse4);
346 VSCALE_FUNCS(avx, avx);
347 
348 #define INPUT_Y_FUNC(fmt, opt) \
349 void ff_ ## fmt ## ToY_ ## opt(uint8_t *dst, const uint8_t *src, \
350  const uint8_t *unused1, const uint8_t *unused2, \
351  int w, uint32_t *unused)
352 #define INPUT_UV_FUNC(fmt, opt) \
353 void ff_ ## fmt ## ToUV_ ## opt(uint8_t *dstU, uint8_t *dstV, \
354  const uint8_t *unused0, \
355  const uint8_t *src1, \
356  const uint8_t *src2, \
357  int w, uint32_t *unused)
358 #define INPUT_FUNC(fmt, opt) \
359  INPUT_Y_FUNC(fmt, opt); \
360  INPUT_UV_FUNC(fmt, opt)
361 #define INPUT_FUNCS(opt) \
362  INPUT_FUNC(uyvy, opt); \
363  INPUT_FUNC(yuyv, opt); \
364  INPUT_UV_FUNC(nv12, opt); \
365  INPUT_UV_FUNC(nv21, opt); \
366  INPUT_FUNC(rgba, opt); \
367  INPUT_FUNC(bgra, opt); \
368  INPUT_FUNC(argb, opt); \
369  INPUT_FUNC(abgr, opt); \
370  INPUT_FUNC(rgb24, opt); \
371  INPUT_FUNC(bgr24, opt)
372 
373 #if ARCH_X86_32
374 INPUT_FUNCS(mmx);
375 #endif
376 INPUT_FUNCS(sse2);
377 INPUT_FUNCS(ssse3);
378 INPUT_FUNCS(avx);
379 
381 {
382  int cpu_flags = av_get_cpu_flags();
383 
384 #if HAVE_INLINE_ASM
385  if (cpu_flags & AV_CPU_FLAG_MMX)
386  sws_init_swScale_MMX(c);
387 #if HAVE_MMXEXT_INLINE
388  if (cpu_flags & AV_CPU_FLAG_MMXEXT)
389  sws_init_swScale_MMXEXT(c);
390  if (cpu_flags & AV_CPU_FLAG_SSE3){
391  if(c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND))
392  c->yuv2planeX = yuv2yuvX_sse3;
393  }
394 #endif
395 #endif /* HAVE_INLINE_ASM */
396 
397 #define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt1, opt2) do { \
398  if (c->srcBpc == 8) { \
399  hscalefn = c->dstBpc <= 14 ? ff_hscale8to15_ ## filtersize ## _ ## opt2 : \
400  ff_hscale8to19_ ## filtersize ## _ ## opt1; \
401  } else if (c->srcBpc == 9) { \
402  hscalefn = c->dstBpc <= 14 ? ff_hscale9to15_ ## filtersize ## _ ## opt2 : \
403  ff_hscale9to19_ ## filtersize ## _ ## opt1; \
404  } else if (c->srcBpc == 10) { \
405  hscalefn = c->dstBpc <= 14 ? ff_hscale10to15_ ## filtersize ## _ ## opt2 : \
406  ff_hscale10to19_ ## filtersize ## _ ## opt1; \
407  } else if (c->srcBpc == 12) { \
408  hscalefn = c->dstBpc <= 14 ? ff_hscale12to15_ ## filtersize ## _ ## opt2 : \
409  ff_hscale12to19_ ## filtersize ## _ ## opt1; \
410  } else if (c->srcBpc == 14 || ((c->srcFormat==AV_PIX_FMT_PAL8||isAnyRGB(c->srcFormat)) && av_pix_fmt_desc_get(c->srcFormat)->comp[0].depth_minus1<15)) { \
411  hscalefn = c->dstBpc <= 14 ? ff_hscale14to15_ ## filtersize ## _ ## opt2 : \
412  ff_hscale14to19_ ## filtersize ## _ ## opt1; \
413  } else { /* c->srcBpc == 16 */ \
414  av_assert0(c->srcBpc == 16);\
415  hscalefn = c->dstBpc <= 14 ? ff_hscale16to15_ ## filtersize ## _ ## opt2 : \
416  ff_hscale16to19_ ## filtersize ## _ ## opt1; \
417  } \
418 } while (0)
419 #define ASSIGN_MMX_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \
420  switch (filtersize) { \
421  case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \
422  case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \
423  default: ASSIGN_SCALE_FUNC2(hscalefn, X, opt1, opt2); break; \
424  }
425 #define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case, condition_8bit) \
426 switch(c->dstBpc){ \
427  case 16: do_16_case; break; \
428  case 10: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_10_ ## opt; break; \
429  case 9: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_9_ ## opt; break; \
430  default: if (condition_8bit) /*vscalefn = ff_yuv2planeX_8_ ## opt;*/ break; \
431  }
432 #define ASSIGN_VSCALE_FUNC(vscalefn, opt1, opt2, opt2chk) \
433  switch(c->dstBpc){ \
434  case 16: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2plane1_16_ ## opt1; break; \
435  case 10: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_10_ ## opt2; break; \
436  case 9: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_9_ ## opt2; break; \
437  case 8: vscalefn = ff_yuv2plane1_8_ ## opt1; break; \
438  default: av_assert0(c->dstBpc>8); \
439  }
440 #define case_rgb(x, X, opt) \
441  case AV_PIX_FMT_ ## X: \
442  c->lumToYV12 = ff_ ## x ## ToY_ ## opt; \
443  if (!c->chrSrcHSubSample) \
444  c->chrToYV12 = ff_ ## x ## ToUV_ ## opt; \
445  break
446 #if ARCH_X86_32
447  if (EXTERNAL_MMX(cpu_flags)) {
448  ASSIGN_MMX_SCALE_FUNC(c->hyScale, c->hLumFilterSize, mmx, mmx);
449  ASSIGN_MMX_SCALE_FUNC(c->hcScale, c->hChrFilterSize, mmx, mmx);
450  ASSIGN_VSCALE_FUNC(c->yuv2plane1, mmx, mmxext, cpu_flags & AV_CPU_FLAG_MMXEXT);
451 
452  switch (c->srcFormat) {
453  case AV_PIX_FMT_Y400A:
454  c->lumToYV12 = ff_yuyvToY_mmx;
455  if (c->alpPixBuf)
456  c->alpToYV12 = ff_uyvyToY_mmx;
457  break;
458  case AV_PIX_FMT_YUYV422:
459  c->lumToYV12 = ff_yuyvToY_mmx;
460  c->chrToYV12 = ff_yuyvToUV_mmx;
461  break;
462  case AV_PIX_FMT_UYVY422:
463  c->lumToYV12 = ff_uyvyToY_mmx;
464  c->chrToYV12 = ff_uyvyToUV_mmx;
465  break;
466  case AV_PIX_FMT_NV12:
467  c->chrToYV12 = ff_nv12ToUV_mmx;
468  break;
469  case AV_PIX_FMT_NV21:
470  c->chrToYV12 = ff_nv21ToUV_mmx;
471  break;
472  case_rgb(rgb24, RGB24, mmx);
473  case_rgb(bgr24, BGR24, mmx);
474  case_rgb(bgra, BGRA, mmx);
475  case_rgb(rgba, RGBA, mmx);
476  case_rgb(abgr, ABGR, mmx);
477  case_rgb(argb, ARGB, mmx);
478  default:
479  break;
480  }
481  }
482  if (EXTERNAL_MMXEXT(cpu_flags)) {
483  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, mmxext, , 1);
484  }
485 #endif /* ARCH_X86_32 */
486 #define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \
487  switch (filtersize) { \
488  case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \
489  case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \
490  default: if (filtersize & 4) ASSIGN_SCALE_FUNC2(hscalefn, X4, opt1, opt2); \
491  else ASSIGN_SCALE_FUNC2(hscalefn, X8, opt1, opt2); \
492  break; \
493  }
494  if (EXTERNAL_SSE2(cpu_flags)) {
495  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse2, sse2);
496  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse2, sse2);
497  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse2, ,
499  ASSIGN_VSCALE_FUNC(c->yuv2plane1, sse2, sse2, 1);
500 
501  switch (c->srcFormat) {
502  case AV_PIX_FMT_Y400A:
503  c->lumToYV12 = ff_yuyvToY_sse2;
504  if (c->alpPixBuf)
505  c->alpToYV12 = ff_uyvyToY_sse2;
506  break;
507  case AV_PIX_FMT_YUYV422:
508  c->lumToYV12 = ff_yuyvToY_sse2;
509  c->chrToYV12 = ff_yuyvToUV_sse2;
510  break;
511  case AV_PIX_FMT_UYVY422:
512  c->lumToYV12 = ff_uyvyToY_sse2;
513  c->chrToYV12 = ff_uyvyToUV_sse2;
514  break;
515  case AV_PIX_FMT_NV12:
516  c->chrToYV12 = ff_nv12ToUV_sse2;
517  break;
518  case AV_PIX_FMT_NV21:
519  c->chrToYV12 = ff_nv21ToUV_sse2;
520  break;
521  case_rgb(rgb24, RGB24, sse2);
522  case_rgb(bgr24, BGR24, sse2);
523  case_rgb(bgra, BGRA, sse2);
524  case_rgb(rgba, RGBA, sse2);
525  case_rgb(abgr, ABGR, sse2);
526  case_rgb(argb, ARGB, sse2);
527  default:
528  break;
529  }
530  }
531  if (EXTERNAL_SSSE3(cpu_flags)) {
532  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, ssse3, ssse3);
533  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, ssse3, ssse3);
534  switch (c->srcFormat) {
535  case_rgb(rgb24, RGB24, ssse3);
536  case_rgb(bgr24, BGR24, ssse3);
537  default:
538  break;
539  }
540  }
541  if (EXTERNAL_SSE4(cpu_flags)) {
542  /* Xto15 don't need special sse4 functions */
543  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse4, ssse3);
544  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse4, ssse3);
546  if (!isBE(c->dstFormat)) c->yuv2planeX = ff_yuv2planeX_16_sse4,
548  if (c->dstBpc == 16 && !isBE(c->dstFormat))
549  c->yuv2plane1 = ff_yuv2plane1_16_sse4;
550  }
551 
552  if (EXTERNAL_AVX(cpu_flags)) {
555  ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx, avx, 1);
556 
557  switch (c->srcFormat) {
558  case AV_PIX_FMT_YUYV422:
559  c->chrToYV12 = ff_yuyvToUV_avx;
560  break;
561  case AV_PIX_FMT_UYVY422:
562  c->chrToYV12 = ff_uyvyToUV_avx;
563  break;
564  case AV_PIX_FMT_NV12:
565  c->chrToYV12 = ff_nv12ToUV_avx;
566  break;
567  case AV_PIX_FMT_NV21:
568  c->chrToYV12 = ff_nv21ToUV_avx;
569  break;
570  case_rgb(rgb24, RGB24, avx);
571  case_rgb(bgr24, BGR24, avx);
572  case_rgb(bgra, BGRA, avx);
573  case_rgb(rgba, RGBA, avx);
574  case_rgb(abgr, ABGR, avx);
575  case_rgb(argb, ARGB, avx);
576  default:
577  break;
578  }
579  }
580 }
#define EXTERNAL_MMX(flags)
Definition: x86/cpu.h:33
packed YUV 4:2:2, 16bpp, Cb Y0 Cr Y1
Definition: pixfmt.h:85
int16_t ** alpPixBuf
Ring buffer for scaled horizontal alpha plane lines to be fed to the vertical scaler.
void(* hcScale)(struct SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize)
const uint64_t ff_dither8[2]
int chrBufIndex
Index in ring buffer of the last scaled horizontal chroma line from source.
const char * s
Definition: avisynth_c.h:668
int chrSrcH
Height of source chroma planes.
#define VSCALE_FUNC(size, opt)
Definition: x86/swscale.c:332
#define SCALE_FUNCS_MMX(opt)
Definition: x86/swscale.c:297
void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufIndex, int lastInLumBuf, int lastInChrBuf)
#define ARCH_X86_64
Definition: config.h:37
void(* chrToYV12)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, int width, uint32_t *pal)
Unscaled conversion of chroma planes to YV12 for horizontal scaler.
void(* alpToYV12)(uint8_t *dst, const uint8_t *src, const uint8_t *src2, const uint8_t *src3, int width, uint32_t *pal)
Unscaled conversion of alpha plane to YV12 for horizontal scaler.
#define DECLARE_ALIGNED(n, t, v)
Definition: mem.h:59
void(* hyScale)(struct SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize)
Scale one horizontal line of input data using a filter over the input lines, to produce one (differen...
#define RGBA(r, g, b, a)
Definition: dvbsubdec.c:153
int dstY
Last destination vertical line output from last slice.
#define case_rgb(x, X, opt)
Macro definitions for various function/variable attributes.
#define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2)
initialize output if(nPeaks >3)%at least 3 peaks in spectrum for trying to find f0 nf0peaks
int srcH
Height of source luma/alpha planes.
#define VSCALE_FUNCS(opt1, opt2)
Definition: x86/swscale.c:335
#define EXTERNAL_SSE4(flags)
Definition: x86/cpu.h:39
int chrDstVSubSample
Binary logarithm of vertical subsampling factor between luma/alpha and chroma planes in destination i...
uint8_t
#define av_cold
Definition: attributes.h:78
int vChrFilterSize
Vertical filter size for chroma pixels.
end end
int16_t ** lumPixBuf
Ring buffer for scaled horizontal luma plane lines to be fed to the vertical scaler.
#define AV_CPU_FLAG_MMXEXT
SSE integer functions or AMD MMX ext.
Definition: cpu.h:30
the mask is usually to keep the same permissions Filters should remove permissions on reference they give to output whenever necessary It can be automatically done by setting the rej_perms field on the output pad Here are a few guidelines corresponding to common then the filter should push the output frames on the output link immediately As an exception to the previous rule if the input frame is enough to produce several output frames then the filter needs output only at least one per link The additional frames can be left buffered in the filter
int lastInLumBuf
Last scaled horizontal luma/alpha line from source in the ring buffer.
external API header
enum AVPixelFormat dstFormat
Destination pixel format.
#define EXTERNAL_SSE2(flags)
Definition: x86/cpu.h:36
#define VSCALEX_FUNCS(opt)
Definition: x86/swscale.c:319
int32_t * vChrFilterPos
Array of vertical filter starting positions for each dst[i] for chroma planes.
int dstH
Height of destination luma/alpha planes.
#define U(x)
const uint64_t ff_dither4[2]
int hLumFilterSize
Horizontal filter size for luma/alpha pixels.
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:93
simple assert() macros that are a bit more flexible than ISO C assert().
int vChrBufSize
Number of vertical chroma lines allocated in the ring buffer.
static const uint8_t offset[127][2]
Definition: vf_spp.c:70
int32_t alpMmxFilter[4 *MAX_FILTER_SIZE]
int hChrFilterSize
Horizontal filter size for chroma pixels.
as above, but U and V bytes are swapped
Definition: pixfmt.h:94
#define APCK_SIZE
#define FFMIN(a, b)
Definition: common.h:58
#define AV_CPU_FLAG_SSE3
Prescott SSE3 functions.
Definition: cpu.h:37
yuv2planar1_fn yuv2plane1
int vLumBufSize
Number of vertical luma/alpha lines allocated in the ring buffer.
#define SCALE_FUNCS_SSE(opt)
Definition: x86/swscale.c:302
int16_t ** chrUPixBuf
Ring buffer for scaled horizontal chroma plane lines to be fed to the vertical scaler.
int32_t
#define SWS_ACCURATE_RND
Definition: swscale.h:83
static int cpu_flags
Definition: dct-test.c:77
int dstW
Width of destination luma/alpha planes.
int32_t * vLumFilterPos
Array of vertical filter starting positions for each dst[i] for luma/alpha planes.
#define AV_PIX_FMT_BGR555
Definition: pixfmt.h:273
static av_always_inline int isBE(enum AVPixelFormat pix_fmt)
int32_t lumMmxFilter[4 *MAX_FILTER_SIZE]
NULL
Definition: eval.c:55
dest
Definition: start.py:60
AVS_Value src
Definition: avisynth_c.h:523
#define ASSIGN_MMX_SCALE_FUNC(hscalefn, filtersize, opt1, opt2)
yuv2planarX_fn yuv2planeX
#define AV_CPU_FLAG_MMX
standard MMX
Definition: cpu.h:29
packed YUV 4:2:2, 16bpp, Y0 Cb Y1 Cr
Definition: pixfmt.h:69
#define APCK_COEF
#define EXTERNAL_SSSE3(flags)
Definition: x86/cpu.h:38
synthesis window for stochastic i
int vLumFilterSize
Vertical filter size for luma/alpha pixels.
int16_t * vChrFilter
Array of vertical filter coefficients for chroma planes.
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:30
#define INPUT_FUNCS(opt)
Definition: x86/swscale.c:361
#define AV_PIX_FMT_Y400A
Definition: pixfmt.h:250
static int flags
Definition: cpu.c:23
#define EXTERNAL_MMXEXT(flags)
Definition: x86/cpu.h:34
int lumBufIndex
Index in ring buffer of the last scaled horizontal luma/alpha line from source.
#define VSCALEX_FUNC(size, opt)
Definition: x86/swscale.c:315
int lastInChrBuf
Last scaled horizontal chroma line from source in the ring buffer.
#define CONFIG_SWSCALE_ALPHA
Definition: config.h:394
static double c[64]
enum AVPixelFormat srcFormat
Source pixel format.
#define HAVE_ALIGNED_STACK
Definition: config.h:152
int32_t chrMmxFilter[4 *MAX_FILTER_SIZE]
#define AV_PIX_FMT_RGB555
Definition: pixfmt.h:269
#define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case, condition_8bit)
#define ASSIGN_VSCALE_FUNC(vscalefn, opt1, opt2, opt2chk)
int x86_reg
DECLARE_ASM_CONST(8, int, deringThreshold)
void(* lumToYV12)(uint8_t *dst, const uint8_t *src, const uint8_t *src2, const uint8_t *src3, int width, uint32_t *pal)
Unscaled conversion of luma plane to YV12 for horizontal scaler.
int16_t * vLumFilter
Array of vertical filter coefficients for luma/alpha planes.
av_cold void ff_sws_init_swScale_mmx(SwsContext *c)
Definition: x86/swscale.c:380
#define APCK_PTR2
int flags
Flags passed by the user to select scaler algorithm, optimizations, subsampling, etc...
#define EXTERNAL_AVX(flags)
Definition: x86/cpu.h:41
#define av_unused
Definition: attributes.h:114