yading@11
|
1 /*
|
yading@11
|
2 * software YUV to RGB converter
|
yading@11
|
3 *
|
yading@11
|
4 * Copyright (C) 2001-2007 Michael Niedermayer
|
yading@11
|
5 * (c) 2010 Konstantin Shishkov
|
yading@11
|
6 *
|
yading@11
|
7 * This file is part of FFmpeg.
|
yading@11
|
8 *
|
yading@11
|
9 * FFmpeg is free software; you can redistribute it and/or
|
yading@11
|
10 * modify it under the terms of the GNU Lesser General Public
|
yading@11
|
11 * License as published by the Free Software Foundation; either
|
yading@11
|
12 * version 2.1 of the License, or (at your option) any later version.
|
yading@11
|
13 *
|
yading@11
|
14 * FFmpeg is distributed in the hope that it will be useful,
|
yading@11
|
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
yading@11
|
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
yading@11
|
17 * Lesser General Public License for more details.
|
yading@11
|
18 *
|
yading@11
|
19 * You should have received a copy of the GNU Lesser General Public
|
yading@11
|
20 * License along with FFmpeg; if not, write to the Free Software
|
yading@11
|
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
yading@11
|
22 */
|
yading@11
|
23
|
yading@11
|
24 #undef MOVNTQ
|
yading@11
|
25 #undef EMMS
|
yading@11
|
26 #undef SFENCE
|
yading@11
|
27
|
yading@11
|
28 #if COMPILE_TEMPLATE_MMXEXT
|
yading@11
|
29 #define MOVNTQ "movntq"
|
yading@11
|
30 #define SFENCE "sfence"
|
yading@11
|
31 #else
|
yading@11
|
32 #define MOVNTQ "movq"
|
yading@11
|
33 #define SFENCE " # nop"
|
yading@11
|
34 #endif
|
yading@11
|
35
|
yading@11
|
36 #define REG_BLUE "0"
|
yading@11
|
37 #define REG_RED "1"
|
yading@11
|
38 #define REG_GREEN "2"
|
yading@11
|
39 #define REG_ALPHA "3"
|
yading@11
|
40
|
yading@11
|
41 #define YUV2RGB_LOOP(depth) \
|
yading@11
|
42 h_size = (c->dstW + 7) & ~7; \
|
yading@11
|
43 if (h_size * depth > FFABS(dstStride[0])) \
|
yading@11
|
44 h_size -= 8; \
|
yading@11
|
45 \
|
yading@11
|
46 vshift = c->srcFormat != AV_PIX_FMT_YUV422P; \
|
yading@11
|
47 \
|
yading@11
|
48 __asm__ volatile ("pxor %mm4, %mm4\n\t"); \
|
yading@11
|
49 for (y = 0; y < srcSliceH; y++) { \
|
yading@11
|
50 uint8_t *image = dst[0] + (y + srcSliceY) * dstStride[0]; \
|
yading@11
|
51 const uint8_t *py = src[0] + y * srcStride[0]; \
|
yading@11
|
52 const uint8_t *pu = src[1] + (y >> vshift) * srcStride[1]; \
|
yading@11
|
53 const uint8_t *pv = src[2] + (y >> vshift) * srcStride[2]; \
|
yading@11
|
54 x86_reg index = -h_size / 2; \
|
yading@11
|
55
|
yading@11
|
56 #define YUV2RGB_INITIAL_LOAD \
|
yading@11
|
57 __asm__ volatile ( \
|
yading@11
|
58 "movq (%5, %0, 2), %%mm6\n\t" \
|
yading@11
|
59 "movd (%2, %0), %%mm0\n\t" \
|
yading@11
|
60 "movd (%3, %0), %%mm1\n\t" \
|
yading@11
|
61 "1: \n\t" \
|
yading@11
|
62
|
yading@11
|
63 /* YUV2RGB core
|
yading@11
|
64 * Conversion is performed in usual way:
|
yading@11
|
65 * R = Y' * Ycoef + Vred * V'
|
yading@11
|
66 * G = Y' * Ycoef + Vgreen * V' + Ugreen * U'
|
yading@11
|
67 * B = Y' * Ycoef + Ublue * U'
|
yading@11
|
68 *
|
yading@11
|
69 * where X' = X * 8 - Xoffset (multiplication is performed to increase
|
yading@11
|
70 * precision a bit).
|
yading@11
|
71 * Since it operates in YUV420 colorspace, Y component is additionally
|
yading@11
|
72 * split into Y1 and Y2 for even and odd pixels.
|
yading@11
|
73 *
|
yading@11
|
74 * Input:
|
yading@11
|
75 * mm0 - U (4 elems), mm1 - V (4 elems), mm6 - Y (8 elems), mm4 - zero register
|
yading@11
|
76 * Output:
|
yading@11
|
77 * mm1 - R, mm2 - G, mm0 - B
|
yading@11
|
78 */
|
yading@11
|
79 #define YUV2RGB \
|
yading@11
|
80 /* convert Y, U, V into Y1', Y2', U', V' */ \
|
yading@11
|
81 "movq %%mm6, %%mm7\n\t" \
|
yading@11
|
82 "punpcklbw %%mm4, %%mm0\n\t" \
|
yading@11
|
83 "punpcklbw %%mm4, %%mm1\n\t" \
|
yading@11
|
84 "pand "MANGLE(mmx_00ffw)", %%mm6\n\t" \
|
yading@11
|
85 "psrlw $8, %%mm7\n\t" \
|
yading@11
|
86 "psllw $3, %%mm0\n\t" \
|
yading@11
|
87 "psllw $3, %%mm1\n\t" \
|
yading@11
|
88 "psllw $3, %%mm6\n\t" \
|
yading@11
|
89 "psllw $3, %%mm7\n\t" \
|
yading@11
|
90 "psubsw "U_OFFSET"(%4), %%mm0\n\t" \
|
yading@11
|
91 "psubsw "V_OFFSET"(%4), %%mm1\n\t" \
|
yading@11
|
92 "psubw "Y_OFFSET"(%4), %%mm6\n\t" \
|
yading@11
|
93 "psubw "Y_OFFSET"(%4), %%mm7\n\t" \
|
yading@11
|
94 \
|
yading@11
|
95 /* multiply by coefficients */ \
|
yading@11
|
96 "movq %%mm0, %%mm2\n\t" \
|
yading@11
|
97 "movq %%mm1, %%mm3\n\t" \
|
yading@11
|
98 "pmulhw "UG_COEFF"(%4), %%mm2\n\t" \
|
yading@11
|
99 "pmulhw "VG_COEFF"(%4), %%mm3\n\t" \
|
yading@11
|
100 "pmulhw "Y_COEFF" (%4), %%mm6\n\t" \
|
yading@11
|
101 "pmulhw "Y_COEFF" (%4), %%mm7\n\t" \
|
yading@11
|
102 "pmulhw "UB_COEFF"(%4), %%mm0\n\t" \
|
yading@11
|
103 "pmulhw "VR_COEFF"(%4), %%mm1\n\t" \
|
yading@11
|
104 "paddsw %%mm3, %%mm2\n\t" \
|
yading@11
|
105 /* now: mm0 = UB, mm1 = VR, mm2 = CG */ \
|
yading@11
|
106 /* mm6 = Y1, mm7 = Y2 */ \
|
yading@11
|
107 \
|
yading@11
|
108 /* produce RGB */ \
|
yading@11
|
109 "movq %%mm7, %%mm3\n\t" \
|
yading@11
|
110 "movq %%mm7, %%mm5\n\t" \
|
yading@11
|
111 "paddsw %%mm0, %%mm3\n\t" \
|
yading@11
|
112 "paddsw %%mm1, %%mm5\n\t" \
|
yading@11
|
113 "paddsw %%mm2, %%mm7\n\t" \
|
yading@11
|
114 "paddsw %%mm6, %%mm0\n\t" \
|
yading@11
|
115 "paddsw %%mm6, %%mm1\n\t" \
|
yading@11
|
116 "paddsw %%mm6, %%mm2\n\t" \
|
yading@11
|
117
|
yading@11
|
118 #define RGB_PACK_INTERLEAVE \
|
yading@11
|
119 /* pack and interleave even/odd pixels */ \
|
yading@11
|
120 "packuswb %%mm1, %%mm0\n\t" \
|
yading@11
|
121 "packuswb %%mm5, %%mm3\n\t" \
|
yading@11
|
122 "packuswb %%mm2, %%mm2\n\t" \
|
yading@11
|
123 "movq %%mm0, %%mm1\n\n" \
|
yading@11
|
124 "packuswb %%mm7, %%mm7\n\t" \
|
yading@11
|
125 "punpcklbw %%mm3, %%mm0\n\t" \
|
yading@11
|
126 "punpckhbw %%mm3, %%mm1\n\t" \
|
yading@11
|
127 "punpcklbw %%mm7, %%mm2\n\t" \
|
yading@11
|
128
|
yading@11
|
129 #define YUV2RGB_ENDLOOP(depth) \
|
yading@11
|
130 "movq 8 (%5, %0, 2), %%mm6\n\t" \
|
yading@11
|
131 "movd 4 (%3, %0), %%mm1\n\t" \
|
yading@11
|
132 "movd 4 (%2, %0), %%mm0\n\t" \
|
yading@11
|
133 "add $"AV_STRINGIFY(depth * 8)", %1\n\t" \
|
yading@11
|
134 "add $4, %0\n\t" \
|
yading@11
|
135 "js 1b\n\t" \
|
yading@11
|
136
|
yading@11
|
137 #define YUV2RGB_OPERANDS \
|
yading@11
|
138 : "+r" (index), "+r" (image) \
|
yading@11
|
139 : "r" (pu - index), "r" (pv - index), "r"(&c->redDither), \
|
yading@11
|
140 "r" (py - 2*index) \
|
yading@11
|
141 : "memory" \
|
yading@11
|
142 ); \
|
yading@11
|
143 } \
|
yading@11
|
144
|
yading@11
|
145 #define YUV2RGB_OPERANDS_ALPHA \
|
yading@11
|
146 : "+r" (index), "+r" (image) \
|
yading@11
|
147 : "r" (pu - index), "r" (pv - index), "r"(&c->redDither), \
|
yading@11
|
148 "r" (py - 2*index), "r" (pa - 2*index) \
|
yading@11
|
149 : "memory" \
|
yading@11
|
150 ); \
|
yading@11
|
151 } \
|
yading@11
|
152
|
yading@11
|
153 #define YUV2RGB_ENDFUNC \
|
yading@11
|
154 __asm__ volatile (SFENCE"\n\t" \
|
yading@11
|
155 "emms \n\t"); \
|
yading@11
|
156 return srcSliceH; \
|
yading@11
|
157
|
yading@11
|
158 #define IF0(x)
|
yading@11
|
159 #define IF1(x) x
|
yading@11
|
160
|
yading@11
|
161 #define RGB_PACK16(gmask, is15) \
|
yading@11
|
162 "pand "MANGLE(mmx_redmask)", %%mm0\n\t" \
|
yading@11
|
163 "pand "MANGLE(mmx_redmask)", %%mm1\n\t" \
|
yading@11
|
164 "movq %%mm2, %%mm3\n\t" \
|
yading@11
|
165 "psllw $"AV_STRINGIFY(3-is15)", %%mm2\n\t" \
|
yading@11
|
166 "psrlw $"AV_STRINGIFY(5+is15)", %%mm3\n\t" \
|
yading@11
|
167 "psrlw $3, %%mm0\n\t" \
|
yading@11
|
168 IF##is15("psrlw $1, %%mm1\n\t") \
|
yading@11
|
169 "pand "MANGLE(pb_e0)", %%mm2\n\t" \
|
yading@11
|
170 "pand "MANGLE(gmask)", %%mm3\n\t" \
|
yading@11
|
171 "por %%mm2, %%mm0\n\t" \
|
yading@11
|
172 "por %%mm3, %%mm1\n\t" \
|
yading@11
|
173 "movq %%mm0, %%mm2\n\t" \
|
yading@11
|
174 "punpcklbw %%mm1, %%mm0\n\t" \
|
yading@11
|
175 "punpckhbw %%mm1, %%mm2\n\t" \
|
yading@11
|
176 MOVNTQ " %%mm0, (%1)\n\t" \
|
yading@11
|
177 MOVNTQ " %%mm2, 8(%1)\n\t" \
|
yading@11
|
178
|
yading@11
|
179 #define DITHER_RGB \
|
yading@11
|
180 "paddusb "BLUE_DITHER"(%4), %%mm0\n\t" \
|
yading@11
|
181 "paddusb "GREEN_DITHER"(%4), %%mm2\n\t" \
|
yading@11
|
182 "paddusb "RED_DITHER"(%4), %%mm1\n\t" \
|
yading@11
|
183
|
yading@11
|
184 #if !COMPILE_TEMPLATE_MMXEXT
|
yading@11
|
185 static inline int RENAME(yuv420_rgb15)(SwsContext *c, const uint8_t *src[],
|
yading@11
|
186 int srcStride[],
|
yading@11
|
187 int srcSliceY, int srcSliceH,
|
yading@11
|
188 uint8_t *dst[], int dstStride[])
|
yading@11
|
189 {
|
yading@11
|
190 int y, h_size, vshift;
|
yading@11
|
191
|
yading@11
|
192 YUV2RGB_LOOP(2)
|
yading@11
|
193
|
yading@11
|
194 #ifdef DITHER1XBPP
|
yading@11
|
195 c->blueDither = ff_dither8[y & 1];
|
yading@11
|
196 c->greenDither = ff_dither8[y & 1];
|
yading@11
|
197 c->redDither = ff_dither8[(y + 1) & 1];
|
yading@11
|
198 #endif
|
yading@11
|
199
|
yading@11
|
200 YUV2RGB_INITIAL_LOAD
|
yading@11
|
201 YUV2RGB
|
yading@11
|
202 RGB_PACK_INTERLEAVE
|
yading@11
|
203 #ifdef DITHER1XBPP
|
yading@11
|
204 DITHER_RGB
|
yading@11
|
205 #endif
|
yading@11
|
206 RGB_PACK16(pb_03, 1)
|
yading@11
|
207
|
yading@11
|
208 YUV2RGB_ENDLOOP(2)
|
yading@11
|
209 YUV2RGB_OPERANDS
|
yading@11
|
210 YUV2RGB_ENDFUNC
|
yading@11
|
211 }
|
yading@11
|
212
|
yading@11
|
213 static inline int RENAME(yuv420_rgb16)(SwsContext *c, const uint8_t *src[],
|
yading@11
|
214 int srcStride[],
|
yading@11
|
215 int srcSliceY, int srcSliceH,
|
yading@11
|
216 uint8_t *dst[], int dstStride[])
|
yading@11
|
217 {
|
yading@11
|
218 int y, h_size, vshift;
|
yading@11
|
219
|
yading@11
|
220 YUV2RGB_LOOP(2)
|
yading@11
|
221
|
yading@11
|
222 #ifdef DITHER1XBPP
|
yading@11
|
223 c->blueDither = ff_dither8[y & 1];
|
yading@11
|
224 c->greenDither = ff_dither4[y & 1];
|
yading@11
|
225 c->redDither = ff_dither8[(y + 1) & 1];
|
yading@11
|
226 #endif
|
yading@11
|
227
|
yading@11
|
228 YUV2RGB_INITIAL_LOAD
|
yading@11
|
229 YUV2RGB
|
yading@11
|
230 RGB_PACK_INTERLEAVE
|
yading@11
|
231 #ifdef DITHER1XBPP
|
yading@11
|
232 DITHER_RGB
|
yading@11
|
233 #endif
|
yading@11
|
234 RGB_PACK16(pb_07, 0)
|
yading@11
|
235
|
yading@11
|
236 YUV2RGB_ENDLOOP(2)
|
yading@11
|
237 YUV2RGB_OPERANDS
|
yading@11
|
238 YUV2RGB_ENDFUNC
|
yading@11
|
239 }
|
yading@11
|
240 #endif /* !COMPILE_TEMPLATE_MMXEXT */
|
yading@11
|
241
|
yading@11
|
242 #define RGB_PACK24(blue, red)\
|
yading@11
|
243 "packuswb %%mm3, %%mm0 \n" /* R0 R2 R4 R6 R1 R3 R5 R7 */\
|
yading@11
|
244 "packuswb %%mm5, %%mm1 \n" /* B0 B2 B4 B6 B1 B3 B5 B7 */\
|
yading@11
|
245 "packuswb %%mm7, %%mm2 \n" /* G0 G2 G4 G6 G1 G3 G5 G7 */\
|
yading@11
|
246 "movq %%mm"red", %%mm3 \n"\
|
yading@11
|
247 "movq %%mm"blue", %%mm6 \n"\
|
yading@11
|
248 "psrlq $32, %%mm"red" \n" /* R1 R3 R5 R7 */\
|
yading@11
|
249 "punpcklbw %%mm2, %%mm3 \n" /* R0 G0 R2 G2 R4 G4 R6 G6 */\
|
yading@11
|
250 "punpcklbw %%mm"red", %%mm6 \n" /* B0 R1 B2 R3 B4 R5 B6 R7 */\
|
yading@11
|
251 "movq %%mm3, %%mm5 \n"\
|
yading@11
|
252 "punpckhbw %%mm"blue", %%mm2 \n" /* G1 B1 G3 B3 G5 B5 G7 B7 */\
|
yading@11
|
253 "punpcklwd %%mm6, %%mm3 \n" /* R0 G0 B0 R1 R2 G2 B2 R3 */\
|
yading@11
|
254 "punpckhwd %%mm6, %%mm5 \n" /* R4 G4 B4 R5 R6 G6 B6 R7 */\
|
yading@11
|
255 RGB_PACK24_B
|
yading@11
|
256
|
yading@11
|
257 #if COMPILE_TEMPLATE_MMXEXT
|
yading@11
|
258 DECLARE_ASM_CONST(8, int16_t, mask1101[4]) = {-1,-1, 0,-1};
|
yading@11
|
259 DECLARE_ASM_CONST(8, int16_t, mask0010[4]) = { 0, 0,-1, 0};
|
yading@11
|
260 DECLARE_ASM_CONST(8, int16_t, mask0110[4]) = { 0,-1,-1, 0};
|
yading@11
|
261 DECLARE_ASM_CONST(8, int16_t, mask1001[4]) = {-1, 0, 0,-1};
|
yading@11
|
262 DECLARE_ASM_CONST(8, int16_t, mask0100[4]) = { 0,-1, 0, 0};
|
yading@11
|
263 #undef RGB_PACK24_B
|
yading@11
|
264 #define RGB_PACK24_B\
|
yading@11
|
265 "pshufw $0xc6, %%mm2, %%mm1 \n"\
|
yading@11
|
266 "pshufw $0x84, %%mm3, %%mm6 \n"\
|
yading@11
|
267 "pshufw $0x38, %%mm5, %%mm7 \n"\
|
yading@11
|
268 "pand "MANGLE(mask1101)", %%mm6 \n" /* R0 G0 B0 R1 -- -- R2 G2 */\
|
yading@11
|
269 "movq %%mm1, %%mm0 \n"\
|
yading@11
|
270 "pand "MANGLE(mask0110)", %%mm7 \n" /* -- -- R6 G6 B6 R7 -- -- */\
|
yading@11
|
271 "movq %%mm1, %%mm2 \n"\
|
yading@11
|
272 "pand "MANGLE(mask0100)", %%mm1 \n" /* -- -- G3 B3 -- -- -- -- */\
|
yading@11
|
273 "psrlq $48, %%mm3 \n" /* B2 R3 -- -- -- -- -- -- */\
|
yading@11
|
274 "pand "MANGLE(mask0010)", %%mm0 \n" /* -- -- -- -- G1 B1 -- -- */\
|
yading@11
|
275 "psllq $32, %%mm5 \n" /* -- -- -- -- R4 G4 B4 R5 */\
|
yading@11
|
276 "pand "MANGLE(mask1001)", %%mm2 \n" /* G5 B5 -- -- -- -- G7 B7 */\
|
yading@11
|
277 "por %%mm3, %%mm1 \n"\
|
yading@11
|
278 "por %%mm6, %%mm0 \n"\
|
yading@11
|
279 "por %%mm5, %%mm1 \n"\
|
yading@11
|
280 "por %%mm7, %%mm2 \n"\
|
yading@11
|
281 MOVNTQ" %%mm0, (%1) \n"\
|
yading@11
|
282 MOVNTQ" %%mm1, 8(%1) \n"\
|
yading@11
|
283 MOVNTQ" %%mm2, 16(%1) \n"\
|
yading@11
|
284
|
yading@11
|
285 #else
|
yading@11
|
286 #undef RGB_PACK24_B
|
yading@11
|
287 #define RGB_PACK24_B\
|
yading@11
|
288 "movd %%mm3, (%1) \n" /* R0 G0 B0 R1 */\
|
yading@11
|
289 "movd %%mm2, 4(%1) \n" /* G1 B1 */\
|
yading@11
|
290 "psrlq $32, %%mm3 \n"\
|
yading@11
|
291 "psrlq $16, %%mm2 \n"\
|
yading@11
|
292 "movd %%mm3, 6(%1) \n" /* R2 G2 B2 R3 */\
|
yading@11
|
293 "movd %%mm2, 10(%1) \n" /* G3 B3 */\
|
yading@11
|
294 "psrlq $16, %%mm2 \n"\
|
yading@11
|
295 "movd %%mm5, 12(%1) \n" /* R4 G4 B4 R5 */\
|
yading@11
|
296 "movd %%mm2, 16(%1) \n" /* G5 B5 */\
|
yading@11
|
297 "psrlq $32, %%mm5 \n"\
|
yading@11
|
298 "movd %%mm2, 20(%1) \n" /* -- -- G7 B7 */\
|
yading@11
|
299 "movd %%mm5, 18(%1) \n" /* R6 G6 B6 R7 */\
|
yading@11
|
300
|
yading@11
|
301 #endif
|
yading@11
|
302
|
yading@11
|
303 static inline int RENAME(yuv420_rgb24)(SwsContext *c, const uint8_t *src[],
|
yading@11
|
304 int srcStride[],
|
yading@11
|
305 int srcSliceY, int srcSliceH,
|
yading@11
|
306 uint8_t *dst[], int dstStride[])
|
yading@11
|
307 {
|
yading@11
|
308 int y, h_size, vshift;
|
yading@11
|
309
|
yading@11
|
310 YUV2RGB_LOOP(3)
|
yading@11
|
311
|
yading@11
|
312 YUV2RGB_INITIAL_LOAD
|
yading@11
|
313 YUV2RGB
|
yading@11
|
314 RGB_PACK24(REG_BLUE, REG_RED)
|
yading@11
|
315
|
yading@11
|
316 YUV2RGB_ENDLOOP(3)
|
yading@11
|
317 YUV2RGB_OPERANDS
|
yading@11
|
318 YUV2RGB_ENDFUNC
|
yading@11
|
319 }
|
yading@11
|
320
|
yading@11
|
321 static inline int RENAME(yuv420_bgr24)(SwsContext *c, const uint8_t *src[],
|
yading@11
|
322 int srcStride[],
|
yading@11
|
323 int srcSliceY, int srcSliceH,
|
yading@11
|
324 uint8_t *dst[], int dstStride[])
|
yading@11
|
325 {
|
yading@11
|
326 int y, h_size, vshift;
|
yading@11
|
327
|
yading@11
|
328 YUV2RGB_LOOP(3)
|
yading@11
|
329
|
yading@11
|
330 YUV2RGB_INITIAL_LOAD
|
yading@11
|
331 YUV2RGB
|
yading@11
|
332 RGB_PACK24(REG_RED, REG_BLUE)
|
yading@11
|
333
|
yading@11
|
334 YUV2RGB_ENDLOOP(3)
|
yading@11
|
335 YUV2RGB_OPERANDS
|
yading@11
|
336 YUV2RGB_ENDFUNC
|
yading@11
|
337 }
|
yading@11
|
338
|
yading@11
|
339
|
yading@11
|
340 #define SET_EMPTY_ALPHA \
|
yading@11
|
341 "pcmpeqd %%mm"REG_ALPHA", %%mm"REG_ALPHA"\n\t" /* set alpha to 0xFF */ \
|
yading@11
|
342
|
yading@11
|
343 #define LOAD_ALPHA \
|
yading@11
|
344 "movq (%6, %0, 2), %%mm"REG_ALPHA"\n\t" \
|
yading@11
|
345
|
yading@11
|
346 #define RGB_PACK32(red, green, blue, alpha) \
|
yading@11
|
347 "movq %%mm"blue", %%mm5\n\t" \
|
yading@11
|
348 "movq %%mm"red", %%mm6\n\t" \
|
yading@11
|
349 "punpckhbw %%mm"green", %%mm5\n\t" \
|
yading@11
|
350 "punpcklbw %%mm"green", %%mm"blue"\n\t" \
|
yading@11
|
351 "punpckhbw %%mm"alpha", %%mm6\n\t" \
|
yading@11
|
352 "punpcklbw %%mm"alpha", %%mm"red"\n\t" \
|
yading@11
|
353 "movq %%mm"blue", %%mm"green"\n\t" \
|
yading@11
|
354 "movq %%mm5, %%mm"alpha"\n\t" \
|
yading@11
|
355 "punpcklwd %%mm"red", %%mm"blue"\n\t" \
|
yading@11
|
356 "punpckhwd %%mm"red", %%mm"green"\n\t" \
|
yading@11
|
357 "punpcklwd %%mm6, %%mm5\n\t" \
|
yading@11
|
358 "punpckhwd %%mm6, %%mm"alpha"\n\t" \
|
yading@11
|
359 MOVNTQ " %%mm"blue", 0(%1)\n\t" \
|
yading@11
|
360 MOVNTQ " %%mm"green", 8(%1)\n\t" \
|
yading@11
|
361 MOVNTQ " %%mm5, 16(%1)\n\t" \
|
yading@11
|
362 MOVNTQ " %%mm"alpha", 24(%1)\n\t" \
|
yading@11
|
363
|
yading@11
|
364 #if !COMPILE_TEMPLATE_MMXEXT
|
yading@11
|
365 static inline int RENAME(yuv420_rgb32)(SwsContext *c, const uint8_t *src[],
|
yading@11
|
366 int srcStride[],
|
yading@11
|
367 int srcSliceY, int srcSliceH,
|
yading@11
|
368 uint8_t *dst[], int dstStride[])
|
yading@11
|
369 {
|
yading@11
|
370 int y, h_size, vshift;
|
yading@11
|
371
|
yading@11
|
372 YUV2RGB_LOOP(4)
|
yading@11
|
373
|
yading@11
|
374 YUV2RGB_INITIAL_LOAD
|
yading@11
|
375 YUV2RGB
|
yading@11
|
376 RGB_PACK_INTERLEAVE
|
yading@11
|
377 SET_EMPTY_ALPHA
|
yading@11
|
378 RGB_PACK32(REG_RED, REG_GREEN, REG_BLUE, REG_ALPHA)
|
yading@11
|
379
|
yading@11
|
380 YUV2RGB_ENDLOOP(4)
|
yading@11
|
381 YUV2RGB_OPERANDS
|
yading@11
|
382 YUV2RGB_ENDFUNC
|
yading@11
|
383 }
|
yading@11
|
384
|
yading@11
|
385 #if HAVE_7REGS && CONFIG_SWSCALE_ALPHA
|
yading@11
|
386 static inline int RENAME(yuva420_rgb32)(SwsContext *c, const uint8_t *src[],
|
yading@11
|
387 int srcStride[],
|
yading@11
|
388 int srcSliceY, int srcSliceH,
|
yading@11
|
389 uint8_t *dst[], int dstStride[])
|
yading@11
|
390 {
|
yading@11
|
391 int y, h_size, vshift;
|
yading@11
|
392
|
yading@11
|
393 YUV2RGB_LOOP(4)
|
yading@11
|
394
|
yading@11
|
395 const uint8_t *pa = src[3] + y * srcStride[3];
|
yading@11
|
396 YUV2RGB_INITIAL_LOAD
|
yading@11
|
397 YUV2RGB
|
yading@11
|
398 RGB_PACK_INTERLEAVE
|
yading@11
|
399 LOAD_ALPHA
|
yading@11
|
400 RGB_PACK32(REG_RED, REG_GREEN, REG_BLUE, REG_ALPHA)
|
yading@11
|
401
|
yading@11
|
402 YUV2RGB_ENDLOOP(4)
|
yading@11
|
403 YUV2RGB_OPERANDS_ALPHA
|
yading@11
|
404 YUV2RGB_ENDFUNC
|
yading@11
|
405 }
|
yading@11
|
406 #endif
|
yading@11
|
407
|
yading@11
|
408 static inline int RENAME(yuv420_bgr32)(SwsContext *c, const uint8_t *src[],
|
yading@11
|
409 int srcStride[],
|
yading@11
|
410 int srcSliceY, int srcSliceH,
|
yading@11
|
411 uint8_t *dst[], int dstStride[])
|
yading@11
|
412 {
|
yading@11
|
413 int y, h_size, vshift;
|
yading@11
|
414
|
yading@11
|
415 YUV2RGB_LOOP(4)
|
yading@11
|
416
|
yading@11
|
417 YUV2RGB_INITIAL_LOAD
|
yading@11
|
418 YUV2RGB
|
yading@11
|
419 RGB_PACK_INTERLEAVE
|
yading@11
|
420 SET_EMPTY_ALPHA
|
yading@11
|
421 RGB_PACK32(REG_BLUE, REG_GREEN, REG_RED, REG_ALPHA)
|
yading@11
|
422
|
yading@11
|
423 YUV2RGB_ENDLOOP(4)
|
yading@11
|
424 YUV2RGB_OPERANDS
|
yading@11
|
425 YUV2RGB_ENDFUNC
|
yading@11
|
426 }
|
yading@11
|
427
|
yading@11
|
428 #if HAVE_7REGS && CONFIG_SWSCALE_ALPHA
|
yading@11
|
429 static inline int RENAME(yuva420_bgr32)(SwsContext *c, const uint8_t *src[],
|
yading@11
|
430 int srcStride[],
|
yading@11
|
431 int srcSliceY, int srcSliceH,
|
yading@11
|
432 uint8_t *dst[], int dstStride[])
|
yading@11
|
433 {
|
yading@11
|
434 int y, h_size, vshift;
|
yading@11
|
435
|
yading@11
|
436 YUV2RGB_LOOP(4)
|
yading@11
|
437
|
yading@11
|
438 const uint8_t *pa = src[3] + y * srcStride[3];
|
yading@11
|
439 YUV2RGB_INITIAL_LOAD
|
yading@11
|
440 YUV2RGB
|
yading@11
|
441 RGB_PACK_INTERLEAVE
|
yading@11
|
442 LOAD_ALPHA
|
yading@11
|
443 RGB_PACK32(REG_BLUE, REG_GREEN, REG_RED, REG_ALPHA)
|
yading@11
|
444
|
yading@11
|
445 YUV2RGB_ENDLOOP(4)
|
yading@11
|
446 YUV2RGB_OPERANDS_ALPHA
|
yading@11
|
447 YUV2RGB_ENDFUNC
|
yading@11
|
448 }
|
yading@11
|
449 #endif
|
yading@11
|
450
|
yading@11
|
451 #endif /* !COMPILE_TEMPLATE_MMXEXT */
|