28 #if COMPILE_TEMPLATE_MMXEXT 29 #define MOVNTQ "movntq" 30 #define SFENCE "sfence" 33 #define SFENCE " # nop" 41 #define YUV2RGB_LOOP(depth) \ 42 h_size = (c->dstW + 7) & ~7; \ 43 if (h_size * depth > FFABS(dstStride[0])) \ 46 vshift = c->srcFormat != AV_PIX_FMT_YUV422P; \ 48 __asm__ volatile ("pxor %mm4, %mm4\n\t"); \ 49 for (y = 0; y < srcSliceH; y++) { \ 50 uint8_t *image = dst[0] + (y + srcSliceY) * dstStride[0]; \ 51 const uint8_t *py = src[0] + y * srcStride[0]; \ 52 const uint8_t *pu = src[1] + (y >> vshift) * srcStride[1]; \ 53 const uint8_t *pv = src[2] + (y >> vshift) * srcStride[2]; \ 54 x86_reg index = -h_size / 2; \ 56 #define YUV2RGB_INITIAL_LOAD \ 58 "movq (%5, %0, 2), %%mm6\n\t" \ 59 "movd (%2, %0), %%mm0\n\t" \ 60 "movd (%3, %0), %%mm1\n\t" \ 81 "movq %%mm6, %%mm7\n\t" \ 82 "punpcklbw %%mm4, %%mm0\n\t" \ 83 "punpcklbw %%mm4, %%mm1\n\t" \ 84 "pand "MANGLE(mmx_00ffw)", %%mm6\n\t" \ 85 "psrlw $8, %%mm7\n\t" \ 86 "psllw $3, %%mm0\n\t" \ 87 "psllw $3, %%mm1\n\t" \ 88 "psllw $3, %%mm6\n\t" \ 89 "psllw $3, %%mm7\n\t" \ 90 "psubsw "U_OFFSET"(%4), %%mm0\n\t" \ 91 "psubsw "V_OFFSET"(%4), %%mm1\n\t" \ 92 "psubw "Y_OFFSET"(%4), %%mm6\n\t" \ 93 "psubw "Y_OFFSET"(%4), %%mm7\n\t" \ 96 "movq %%mm0, %%mm2\n\t" \ 97 "movq %%mm1, %%mm3\n\t" \ 98 "pmulhw "UG_COEFF"(%4), %%mm2\n\t" \ 99 "pmulhw "VG_COEFF"(%4), %%mm3\n\t" \ 100 "pmulhw "Y_COEFF" (%4), %%mm6\n\t" \ 101 "pmulhw "Y_COEFF" (%4), %%mm7\n\t" \ 102 "pmulhw "UB_COEFF"(%4), %%mm0\n\t" \ 103 "pmulhw "VR_COEFF"(%4), %%mm1\n\t" \ 104 "paddsw %%mm3, %%mm2\n\t" \ 109 "movq %%mm7, %%mm3\n\t" \ 110 "movq %%mm7, %%mm5\n\t" \ 111 "paddsw %%mm0, %%mm3\n\t" \ 112 "paddsw %%mm1, %%mm5\n\t" \ 113 "paddsw %%mm2, %%mm7\n\t" \ 114 "paddsw %%mm6, %%mm0\n\t" \ 115 "paddsw %%mm6, %%mm1\n\t" \ 116 "paddsw %%mm6, %%mm2\n\t" \ 118 #define RGB_PACK_INTERLEAVE \ 120 "packuswb %%mm1, %%mm0\n\t" \ 121 "packuswb %%mm5, %%mm3\n\t" \ 122 "packuswb %%mm2, %%mm2\n\t" \ 123 "movq %%mm0, %%mm1\n\n" \ 124 "packuswb %%mm7, %%mm7\n\t" \ 125 "punpcklbw %%mm3, %%mm0\n\t" \ 126 "punpckhbw %%mm3, %%mm1\n\t" \ 127 "punpcklbw %%mm7, %%mm2\n\t" \ 129 #define YUV2RGB_ENDLOOP(depth) \ 130 "movq 8 (%5, %0, 2), %%mm6\n\t" \ 131 "movd 4 (%3, %0), %%mm1\n\t" \ 132 "movd 4 (%2, %0), %%mm0\n\t" \ 133 "add $"AV_STRINGIFY(depth * 8)", %1\n\t" \ 137 #define YUV2RGB_OPERANDS \ 138 : "+r" (index), "+r" (image) \ 139 : "r" (pu - index), "r" (pv - index), "r"(&c->redDither), \ 145 #define YUV2RGB_OPERANDS_ALPHA \ 146 : "+r" (index), "+r" (image) \ 147 : "r" (pu - index), "r" (pv - index), "r"(&c->redDither), \ 148 "r" (py - 2*index), "r" (pa - 2*index) \ 153 #define YUV2RGB_ENDFUNC \ 154 __asm__ volatile (SFENCE"\n\t" \ 161 #define RGB_PACK16(gmask, is15) \ 162 "pand "MANGLE(mmx_redmask)", %%mm0\n\t" \ 163 "pand "MANGLE(mmx_redmask)", %%mm1\n\t" \ 164 "movq %%mm2, %%mm3\n\t" \ 165 "psllw $"AV_STRINGIFY(3-is15)", %%mm2\n\t" \ 166 "psrlw $"AV_STRINGIFY(5+is15)", %%mm3\n\t" \ 167 "psrlw $3, %%mm0\n\t" \ 168 IF##is15("psrlw $1, %%mm1\n\t") \ 169 "pand "MANGLE(pb_e0)", %%mm2\n\t" \ 170 "pand "MANGLE(gmask)", %%mm3\n\t" \ 171 "por %%mm2, %%mm0\n\t" \ 172 "por %%mm3, %%mm1\n\t" \ 173 "movq %%mm0, %%mm2\n\t" \ 174 "punpcklbw %%mm1, %%mm0\n\t" \ 175 "punpckhbw %%mm1, %%mm2\n\t" \ 176 MOVNTQ " %%mm0, (%1)\n\t" \ 177 MOVNTQ " %%mm2, 8(%1)\n\t" \ 180 "paddusb "BLUE_DITHER"(%4), %%mm0\n\t" \ 181 "paddusb "GREEN_DITHER"(%4), %%mm2\n\t" \ 182 "paddusb "RED_DITHER"(%4), %%mm1\n\t" \ 184 #if !COMPILE_TEMPLATE_MMXEXT 187 int srcSliceY,
int srcSliceH,
190 int y, h_size, vshift;
215 int srcSliceY,
int srcSliceH,
218 int y, h_size, vshift;
242 #define RGB_PACK24(blue, red)\ 243 "packuswb %%mm3, %%mm0 \n" \ 244 "packuswb %%mm5, %%mm1 \n" \ 245 "packuswb %%mm7, %%mm2 \n" \ 246 "movq %%mm"red", %%mm3 \n"\ 247 "movq %%mm"blue", %%mm6 \n"\ 248 "psrlq $32, %%mm"red" \n" \ 249 "punpcklbw %%mm2, %%mm3 \n" \ 250 "punpcklbw %%mm"red", %%mm6 \n" \ 251 "movq %%mm3, %%mm5 \n"\ 252 "punpckhbw %%mm"blue", %%mm2 \n" \ 253 "punpcklwd %%mm6, %%mm3 \n" \ 254 "punpckhwd %%mm6, %%mm5 \n" \ 257 #if COMPILE_TEMPLATE_MMXEXT 264 #define RGB_PACK24_B\ 265 "pshufw $0xc6, %%mm2, %%mm1 \n"\ 266 "pshufw $0x84, %%mm3, %%mm6 \n"\ 267 "pshufw $0x38, %%mm5, %%mm7 \n"\ 268 "pand "MANGLE(mask1101)", %%mm6 \n" \ 269 "movq %%mm1, %%mm0 \n"\ 270 "pand "MANGLE(mask0110)", %%mm7 \n" \ 271 "movq %%mm1, %%mm2 \n"\ 272 "pand "MANGLE(mask0100)", %%mm1 \n" \ 273 "psrlq $48, %%mm3 \n" \ 274 "pand "MANGLE(mask0010)", %%mm0 \n" \ 275 "psllq $32, %%mm5 \n" \ 276 "pand "MANGLE(mask1001)", %%mm2 \n" \ 277 "por %%mm3, %%mm1 \n"\ 278 "por %%mm6, %%mm0 \n"\ 279 "por %%mm5, %%mm1 \n"\ 280 "por %%mm7, %%mm2 \n"\ 281 MOVNTQ" %%mm0, (%1) \n"\ 282 MOVNTQ" %%mm1, 8(%1) \n"\ 283 MOVNTQ" %%mm2, 16(%1) \n"\ 287 #define RGB_PACK24_B\ 288 "movd %%mm3, (%1) \n" \ 289 "movd %%mm2, 4(%1) \n" \ 290 "psrlq $32, %%mm3 \n"\ 291 "psrlq $16, %%mm2 \n"\ 292 "movd %%mm3, 6(%1) \n" \ 293 "movd %%mm2, 10(%1) \n" \ 294 "psrlq $16, %%mm2 \n"\ 295 "movd %%mm5, 12(%1) \n" \ 296 "movd %%mm2, 16(%1) \n" \ 297 "psrlq $32, %%mm5 \n"\ 298 "movd %%mm2, 20(%1) \n" \ 299 "movd %%mm5, 18(%1) \n" \ 305 int srcSliceY,
int srcSliceH,
308 int y, h_size, vshift;
323 int srcSliceY,
int srcSliceH,
326 int y, h_size, vshift;
340 #define SET_EMPTY_ALPHA \ 341 "pcmpeqd %%mm"REG_ALPHA", %%mm"REG_ALPHA"\n\t" \ 344 "movq (%6, %0, 2), %%mm"REG_ALPHA"\n\t" \ 346 #define RGB_PACK32(red, green, blue, alpha) \ 347 "movq %%mm"blue", %%mm5\n\t" \ 348 "movq %%mm"red", %%mm6\n\t" \ 349 "punpckhbw %%mm"green", %%mm5\n\t" \ 350 "punpcklbw %%mm"green", %%mm"blue"\n\t" \ 351 "punpckhbw %%mm"alpha", %%mm6\n\t" \ 352 "punpcklbw %%mm"alpha", %%mm"red"\n\t" \ 353 "movq %%mm"blue", %%mm"green"\n\t" \ 354 "movq %%mm5, %%mm"alpha"\n\t" \ 355 "punpcklwd %%mm"red", %%mm"blue"\n\t" \ 356 "punpckhwd %%mm"red", %%mm"green"\n\t" \ 357 "punpcklwd %%mm6, %%mm5\n\t" \ 358 "punpckhwd %%mm6, %%mm"alpha"\n\t" \ 359 MOVNTQ " %%mm"blue", 0(%1)\n\t" \ 360 MOVNTQ " %%mm"green", 8(%1)\n\t" \ 361 MOVNTQ " %%mm5, 16(%1)\n\t" \ 362 MOVNTQ " %%mm"alpha", 24(%1)\n\t" \ 364 #if !COMPILE_TEMPLATE_MMXEXT 367 int srcSliceY,
int srcSliceH,
370 int y, h_size, vshift;
385 #if HAVE_7REGS && CONFIG_SWSCALE_ALPHA 388 int srcSliceY,
int srcSliceH,
391 int y, h_size, vshift;
395 const uint8_t *pa =
src[3] + y * srcStride[3];
410 int srcSliceY,
int srcSliceH,
413 int y, h_size, vshift;
428 #if HAVE_7REGS && CONFIG_SWSCALE_ALPHA 431 int srcSliceY,
int srcSliceH,
434 int y, h_size, vshift;
438 const uint8_t *pa =
src[3] + y * srcStride[3];
const uint64_t ff_dither8[2]
#define YUV2RGB_INITIAL_LOAD
static int RENAME() yuv420_rgb24(SwsContext *c, const uint8_t *src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[])
const uint64_t ff_dither4[2]
static int RENAME() yuv420_bgr24(SwsContext *c, const uint8_t *src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[])
static int RENAME() yuv420_rgb15(SwsContext *c, const uint8_t *src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[])
#define RGB_PACK32(red, green, blue, alpha)
static int RENAME() yuv420_rgb16(SwsContext *c, const uint8_t *src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[])
#define YUV2RGB_ENDLOOP(depth)
static int RENAME() yuv420_bgr32(SwsContext *c, const uint8_t *src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[])
#define YUV2RGB_LOOP(depth)
#define RGB_PACK16(gmask, is15)
static int RENAME() yuv420_rgb32(SwsContext *c, const uint8_t *src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[])
#define YUV2RGB_OPERANDS_ALPHA
DECLARE_ASM_CONST(8, int, deringThreshold)
#define RGB_PACK_INTERLEAVE
else dst[i][x+y *dst_stride[i]]
#define RGB_PACK24(blue, red)