44 0x0200020002000200LL,};
48 0x0004000400040004LL,};
61 DECLARE_ALIGNED(8,
const uint64_t, ff_bgr2YCoeff) = 0x000020E540830C8BULL;
62 DECLARE_ALIGNED(8,
const uint64_t, ff_bgr2UCoeff) = 0x0000ED0FDAC23831ULL;
63 DECLARE_ALIGNED(8,
const uint64_t, ff_bgr2VCoeff) = 0x00003831D0E6F6EAULL;
65 DECLARE_ALIGNED(8,
const uint64_t, ff_bgr2YOffset) = 0x1010101010101010ULL;
66 DECLARE_ALIGNED(8,
const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL;
73 #define COMPILE_TEMPLATE_MMXEXT 0 74 #define RENAME(a) a ## _MMX 79 #if HAVE_MMXEXT_INLINE 81 #undef COMPILE_TEMPLATE_MMXEXT 82 #define COMPILE_TEMPLATE_MMXEXT 1 83 #define RENAME(a) a ## _MMXEXT 107 const int firstLumSrcY= vLumFilterPos[
dstY];
108 const int firstChrSrcY= vChrFilterPos[chrDstY];
116 if (dstY < dstH - 2) {
117 const int16_t **lumSrcPtr= (
const int16_t **)(
void*) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf +
vLumBufSize;
118 const int16_t **chrUSrcPtr= (
const int16_t **)(
void*) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf +
vChrBufSize;
119 const int16_t **alpSrcPtr= (
CONFIG_SWSCALE_ALPHA &&
alpPixBuf) ? (
const int16_t **)(
void*) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize :
NULL;
122 if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->
srcH) {
123 const int16_t **tmpY = (
const int16_t **) lumPixBuf + 2 * vLumBufSize;
124 int neg = -firstLumSrcY,
i,
end =
FFMIN(c->
srcH - firstLumSrcY, vLumFilterSize);
125 for (i = 0; i < neg; i++)
126 tmpY[i] = lumSrcPtr[neg];
127 for ( ; i <
end; i++)
128 tmpY[i] = lumSrcPtr[i];
134 const int16_t **tmpA = (
const int16_t **) alpPixBuf + 2 * vLumBufSize;
135 for (i = 0; i < neg; i++)
136 tmpA[i] = alpSrcPtr[neg];
137 for ( ; i <
end; i++)
138 tmpA[i] = alpSrcPtr[i];
140 tmpA[i] = tmpA[i - 1];
144 if (firstChrSrcY < 0 || firstChrSrcY + vChrFilterSize > c->
chrSrcH) {
145 const int16_t **tmpU = (
const int16_t **) chrUPixBuf + 2 * vChrBufSize;
146 int neg = -firstChrSrcY,
i, end =
FFMIN(c->
chrSrcH - firstChrSrcY, vChrFilterSize);
147 for (i = 0; i < neg; i++) {
148 tmpU[
i] = chrUSrcPtr[neg];
150 for ( ; i <
end; i++) {
151 tmpU[
i] = chrUSrcPtr[
i];
154 tmpU[
i] = tmpU[i - 1];
162 *(
const void**)&lumMmxFilter[s*i ]= lumSrcPtr[i ];
163 *(
const void**)&lumMmxFilter[s*i+
APCK_PTR2/4 ]= lumSrcPtr[i+(vLumFilterSize>1)];
165 lumMmxFilter[s*i+
APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize +
i ]
166 + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0);
168 *(
const void**)&alpMmxFilter[s*i ]= alpSrcPtr[i ];
169 *(
const void**)&alpMmxFilter[s*i+
APCK_PTR2/4 ]= alpSrcPtr[i+(vLumFilterSize>1)];
175 *(
const void**)&chrMmxFilter[s*i ]= chrUSrcPtr[i ];
176 *(
const void**)&chrMmxFilter[s*i+
APCK_PTR2/4 ]= chrUSrcPtr[i+(vChrFilterSize>1)];
178 chrMmxFilter[s*i+
APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize +
i ]
179 + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0);
183 *(
const void**)&lumMmxFilter[4*i+0]= lumSrcPtr[i];
186 ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001
U;
188 *(
const void**)&alpMmxFilter[4*i+0]= alpSrcPtr[i];
190 alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2];
194 *(
const void**)&chrMmxFilter[4*i+0]= chrUSrcPtr[i];
197 ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001
U;
204 static void yuv2yuvX_sse3(
const int16_t *
filter,
int filterSize,
208 if(((
int)dest) & 15){
209 return yuv2yuvX_MMXEXT(filter, filterSize, src, dest, dstW, dither, offset);
212 __asm__
volatile(
"movq (%0), %%xmm3\n\t" 213 "movdqa %%xmm3, %%xmm4\n\t" 214 "psrlq $24, %%xmm3\n\t" 215 "psllq $40, %%xmm4\n\t" 216 "por %%xmm4, %%xmm3\n\t" 220 __asm__
volatile(
"movq (%0), %%xmm3\n\t" 226 "pxor %%xmm0, %%xmm0\n\t" 227 "punpcklbw %%xmm0, %%xmm3\n\t" 228 "movd %0, %%xmm1\n\t" 229 "punpcklwd %%xmm1, %%xmm1\n\t" 230 "punpckldq %%xmm1, %%xmm1\n\t" 231 "punpcklqdq %%xmm1, %%xmm1\n\t" 232 "psllw $3, %%xmm1\n\t" 233 "paddw %%xmm1, %%xmm3\n\t" 234 "psraw $4, %%xmm3\n\t" 238 "movdqa %%xmm3, %%xmm4\n\t" 239 "movdqa %%xmm3, %%xmm7\n\t" 241 "mov %0, %%"REG_d
" \n\t"\
242 "mov (%%"REG_d
"), %%"REG_S
" \n\t"\
245 "movddup 8(%%"REG_d
"), %%xmm0 \n\t" \
246 "movdqa (%%"REG_S
", %%"REG_c
", 2), %%xmm2 \n\t" \
247 "movdqa 16(%%"REG_S
", %%"REG_c
", 2), %%xmm5 \n\t" \
248 "add $16, %%"REG_d
" \n\t"\
249 "mov (%%"REG_d
"), %%"REG_S
" \n\t"\
250 "test %%"REG_S
", %%"REG_S
" \n\t"\
251 "pmulhw %%xmm0, %%xmm2 \n\t"\
252 "pmulhw %%xmm0, %%xmm5 \n\t"\
253 "paddw %%xmm2, %%xmm3 \n\t"\
254 "paddw %%xmm5, %%xmm4 \n\t"\
256 "psraw $3, %%xmm3 \n\t"\
257 "psraw $3, %%xmm4 \n\t"\
258 "packuswb %%xmm4, %%xmm3 \n\t" 259 "movntdq %%xmm3, (%1, %%"REG_c
")\n\t" 260 "add $16, %%"REG_c
" \n\t"\
261 "cmp %2, %%"REG_c
" \n\t"\
262 "movdqa %%xmm7, %%xmm3\n\t" 263 "movdqa %%xmm7, %%xmm4\n\t" 264 "mov %0, %%"REG_d
" \n\t"\
265 "mov (%%"REG_d
"), %%"REG_S
" \n\t"\
268 "r" (dest-offset),
"g" ((
x86_reg)(dstW+offset)),
"m" (offset)
269 :
"%"REG_d,
"%"REG_S,
"%"REG_c
276 #define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \ 277 void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \ 278 SwsContext *c, int16_t *data, \ 279 int dstW, const uint8_t *src, \ 280 const int16_t *filter, \ 281 const int32_t *filterPos, int filterSize) 283 #define SCALE_FUNCS(filter_n, opt) \ 284 SCALE_FUNC(filter_n, 8, 15, opt); \ 285 SCALE_FUNC(filter_n, 9, 15, opt); \ 286 SCALE_FUNC(filter_n, 10, 15, opt); \ 287 SCALE_FUNC(filter_n, 12, 15, opt); \ 288 SCALE_FUNC(filter_n, 14, 15, opt); \ 289 SCALE_FUNC(filter_n, 16, 15, opt); \ 290 SCALE_FUNC(filter_n, 8, 19, opt); \ 291 SCALE_FUNC(filter_n, 9, 19, opt); \ 292 SCALE_FUNC(filter_n, 10, 19, opt); \ 293 SCALE_FUNC(filter_n, 12, 19, opt); \ 294 SCALE_FUNC(filter_n, 14, 19, opt); \ 295 SCALE_FUNC(filter_n, 16, 19, opt) 297 #define SCALE_FUNCS_MMX(opt) \ 298 SCALE_FUNCS(4, opt); \ 299 SCALE_FUNCS(8, opt); \ 302 #define SCALE_FUNCS_SSE(opt) \ 303 SCALE_FUNCS(4, opt); \ 304 SCALE_FUNCS(8, opt); \ 305 SCALE_FUNCS(X4, opt); \ 315 #define VSCALEX_FUNC(size, opt) \ 316 void ff_yuv2planeX_ ## size ## _ ## opt(const int16_t *filter, int filterSize, \ 317 const int16_t **src, uint8_t *dest, int dstW, \ 318 const uint8_t *dither, int offset) 319 #define VSCALEX_FUNCS(opt) \ 320 VSCALEX_FUNC(8, opt); \ 321 VSCALEX_FUNC(9, opt); \ 322 VSCALEX_FUNC(10, opt) 332 #define VSCALE_FUNC(size, opt) \ 333 void ff_yuv2plane1_ ## size ## _ ## opt(const int16_t *src, uint8_t *dst, int dstW, \ 334 const uint8_t *dither, int offset) 335 #define VSCALE_FUNCS(opt1, opt2) \ 336 VSCALE_FUNC(8, opt1); \ 337 VSCALE_FUNC(9, opt2); \ 338 VSCALE_FUNC(10, opt2); \ 339 VSCALE_FUNC(16, opt1) 348 #define INPUT_Y_FUNC(fmt, opt) \ 349 void ff_ ## fmt ## ToY_ ## opt(uint8_t *dst, const uint8_t *src, \ 350 const uint8_t *unused1, const uint8_t *unused2, \ 351 int w, uint32_t *unused) 352 #define INPUT_UV_FUNC(fmt, opt) \ 353 void ff_ ## fmt ## ToUV_ ## opt(uint8_t *dstU, uint8_t *dstV, \ 354 const uint8_t *unused0, \ 355 const uint8_t *src1, \ 356 const uint8_t *src2, \ 357 int w, uint32_t *unused) 358 #define INPUT_FUNC(fmt, opt) \ 359 INPUT_Y_FUNC(fmt, opt); \ 360 INPUT_UV_FUNC(fmt, opt) 361 #define INPUT_FUNCS(opt) \ 362 INPUT_FUNC(uyvy, opt); \ 363 INPUT_FUNC(yuyv, opt); \ 364 INPUT_UV_FUNC(nv12, opt); \ 365 INPUT_UV_FUNC(nv21, opt); \ 366 INPUT_FUNC(rgba, opt); \ 367 INPUT_FUNC(bgra, opt); \ 368 INPUT_FUNC(argb, opt); \ 369 INPUT_FUNC(abgr, opt); \ 370 INPUT_FUNC(rgb24, opt); \ 371 INPUT_FUNC(bgr24, opt) 386 sws_init_swScale_MMX(c);
387 #if HAVE_MMXEXT_INLINE 389 sws_init_swScale_MMXEXT(c);
397 #define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt1, opt2) do { \ 398 if (c->srcBpc == 8) { \ 399 hscalefn = c->dstBpc <= 14 ? ff_hscale8to15_ ## filtersize ## _ ## opt2 : \ 400 ff_hscale8to19_ ## filtersize ## _ ## opt1; \ 401 } else if (c->srcBpc == 9) { \ 402 hscalefn = c->dstBpc <= 14 ? ff_hscale9to15_ ## filtersize ## _ ## opt2 : \ 403 ff_hscale9to19_ ## filtersize ## _ ## opt1; \ 404 } else if (c->srcBpc == 10) { \ 405 hscalefn = c->dstBpc <= 14 ? ff_hscale10to15_ ## filtersize ## _ ## opt2 : \ 406 ff_hscale10to19_ ## filtersize ## _ ## opt1; \ 407 } else if (c->srcBpc == 12) { \ 408 hscalefn = c->dstBpc <= 14 ? ff_hscale12to15_ ## filtersize ## _ ## opt2 : \ 409 ff_hscale12to19_ ## filtersize ## _ ## opt1; \ 410 } else if (c->srcBpc == 14 || ((c->srcFormat==AV_PIX_FMT_PAL8||isAnyRGB(c->srcFormat)) && av_pix_fmt_desc_get(c->srcFormat)->comp[0].depth_minus1<15)) { \ 411 hscalefn = c->dstBpc <= 14 ? ff_hscale14to15_ ## filtersize ## _ ## opt2 : \ 412 ff_hscale14to19_ ## filtersize ## _ ## opt1; \ 414 av_assert0(c->srcBpc == 16);\ 415 hscalefn = c->dstBpc <= 14 ? ff_hscale16to15_ ## filtersize ## _ ## opt2 : \ 416 ff_hscale16to19_ ## filtersize ## _ ## opt1; \ 419 #define ASSIGN_MMX_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \ 420 switch (filtersize) { \ 421 case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \ 422 case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \ 423 default: ASSIGN_SCALE_FUNC2(hscalefn, X, opt1, opt2); break; \ 425 #define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case, condition_8bit) \ 427 case 16: do_16_case; break; \ 428 case 10: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_10_ ## opt; break; \ 429 case 9: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_9_ ## opt; break; \ 430 default: if (condition_8bit) break; \ 432 #define ASSIGN_VSCALE_FUNC(vscalefn, opt1, opt2, opt2chk) \ 434 case 16: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2plane1_16_ ## opt1; break; \ 435 case 10: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_10_ ## opt2; break; \ 436 case 9: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_9_ ## opt2; break; \ 437 case 8: vscalefn = ff_yuv2plane1_8_ ## opt1; break; \ 438 default: av_assert0(c->dstBpc>8); \ 440 #define case_rgb(x, X, opt) \ 441 case AV_PIX_FMT_ ## X: \ 442 c->lumToYV12 = ff_ ## x ## ToY_ ## opt; \ 443 if (!c->chrSrcHSubSample) \ 444 c->chrToYV12 = ff_ ## x ## ToUV_ ## opt; \ 486 #define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \ 487 switch (filtersize) { \ 488 case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \ 489 case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \ 490 default: if (filtersize & 4) ASSIGN_SCALE_FUNC2(hscalefn, X4, opt1, opt2); \ 491 else ASSIGN_SCALE_FUNC2(hscalefn, X8, opt1, opt2); \ #define EXTERNAL_MMX(flags)
packed YUV 4:2:2, 16bpp, Cb Y0 Cr Y1
int16_t ** alpPixBuf
Ring buffer for scaled horizontal alpha plane lines to be fed to the vertical scaler.
void(* hcScale)(struct SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize)
const uint64_t ff_dither8[2]
int chrBufIndex
Index in ring buffer of the last scaled horizontal chroma line from source.
int chrSrcH
Height of source chroma planes.
#define VSCALE_FUNC(size, opt)
#define SCALE_FUNCS_MMX(opt)
void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufIndex, int lastInLumBuf, int lastInChrBuf)
void(* chrToYV12)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, int width, uint32_t *pal)
Unscaled conversion of chroma planes to YV12 for horizontal scaler.
void(* alpToYV12)(uint8_t *dst, const uint8_t *src, const uint8_t *src2, const uint8_t *src3, int width, uint32_t *pal)
Unscaled conversion of alpha plane to YV12 for horizontal scaler.
#define DECLARE_ALIGNED(n, t, v)
void(* hyScale)(struct SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize)
Scale one horizontal line of input data using a filter over the input lines, to produce one (differen...
int dstY
Last destination vertical line output from last slice.
#define case_rgb(x, X, opt)
Macro definitions for various function/variable attributes.
#define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2)
int srcH
Height of source luma/alpha planes.
#define VSCALE_FUNCS(opt1, opt2)
#define EXTERNAL_SSE4(flags)
int chrDstVSubSample
Binary logarithm of vertical subsampling factor between luma/alpha and chroma planes in destination i...
int vChrFilterSize
Vertical filter size for chroma pixels.
int16_t ** lumPixBuf
Ring buffer for scaled horizontal luma plane lines to be fed to the vertical scaler.
#define AV_CPU_FLAG_MMXEXT
SSE integer functions or AMD MMX ext.
the mask is usually to keep the same permissions Filters should remove permissions on reference they give to output whenever necessary It can be automatically done by setting the rej_perms field on the output pad Here are a few guidelines corresponding to common then the filter should push the output frames on the output link immediately As an exception to the previous rule if the input frame is enough to produce several output frames then the filter needs output only at least one per link The additional frames can be left buffered in the filter
int lastInLumBuf
Last scaled horizontal luma/alpha line from source in the ring buffer.
enum AVPixelFormat dstFormat
Destination pixel format.
#define EXTERNAL_SSE2(flags)
#define VSCALEX_FUNCS(opt)
int32_t * vChrFilterPos
Array of vertical filter starting positions for each dst[i] for chroma planes.
int dstH
Height of destination luma/alpha planes.
const uint64_t ff_dither4[2]
int hLumFilterSize
Horizontal filter size for luma/alpha pixels.
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
simple assert() macros that are a bit more flexible than ISO C assert().
int vChrBufSize
Number of vertical chroma lines allocated in the ring buffer.
static const uint8_t offset[127][2]
int32_t alpMmxFilter[4 *MAX_FILTER_SIZE]
int hChrFilterSize
Horizontal filter size for chroma pixels.
as above, but U and V bytes are swapped
#define AV_CPU_FLAG_SSE3
Prescott SSE3 functions.
yuv2planar1_fn yuv2plane1
int vLumBufSize
Number of vertical luma/alpha lines allocated in the ring buffer.
#define SCALE_FUNCS_SSE(opt)
int16_t ** chrUPixBuf
Ring buffer for scaled horizontal chroma plane lines to be fed to the vertical scaler.
int dstW
Width of destination luma/alpha planes.
int32_t * vLumFilterPos
Array of vertical filter starting positions for each dst[i] for luma/alpha planes.
#define AV_PIX_FMT_BGR555
static av_always_inline int isBE(enum AVPixelFormat pix_fmt)
int32_t lumMmxFilter[4 *MAX_FILTER_SIZE]
#define ASSIGN_MMX_SCALE_FUNC(hscalefn, filtersize, opt1, opt2)
yuv2planarX_fn yuv2planeX
#define AV_CPU_FLAG_MMX
standard MMX
packed YUV 4:2:2, 16bpp, Y0 Cb Y1 Cr
#define EXTERNAL_SSSE3(flags)
synthesis window for stochastic i
int vLumFilterSize
Vertical filter size for luma/alpha pixels.
int16_t * vChrFilter
Array of vertical filter coefficients for chroma planes.
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
#define EXTERNAL_MMXEXT(flags)
int lumBufIndex
Index in ring buffer of the last scaled horizontal luma/alpha line from source.
#define VSCALEX_FUNC(size, opt)
int lastInChrBuf
Last scaled horizontal chroma line from source in the ring buffer.
#define CONFIG_SWSCALE_ALPHA
enum AVPixelFormat srcFormat
Source pixel format.
#define HAVE_ALIGNED_STACK
int32_t chrMmxFilter[4 *MAX_FILTER_SIZE]
#define AV_PIX_FMT_RGB555
#define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case, condition_8bit)
#define ASSIGN_VSCALE_FUNC(vscalefn, opt1, opt2, opt2chk)
DECLARE_ASM_CONST(8, int, deringThreshold)
void(* lumToYV12)(uint8_t *dst, const uint8_t *src, const uint8_t *src2, const uint8_t *src3, int width, uint32_t *pal)
Unscaled conversion of luma plane to YV12 for horizontal scaler.
int16_t * vLumFilter
Array of vertical filter coefficients for luma/alpha planes.
av_cold void ff_sws_init_swScale_mmx(SwsContext *c)
int flags
Flags passed by the user to select scaler algorithm, optimizations, subsampling, etc...
#define EXTERNAL_AVX(flags)