annotate ffmpeg/libswscale/x86/swscale_template.c @ 13:844d341cf643 tip

Back up before ISMIR
author Yading Song <yading.song@eecs.qmul.ac.uk>
date Thu, 31 Oct 2013 13:17:06 +0000
parents f445c3017523
children
rev   line source
yading@11 1 /*
yading@11 2 * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
yading@11 3 *
yading@11 4 * This file is part of FFmpeg.
yading@11 5 *
yading@11 6 * FFmpeg is free software; you can redistribute it and/or
yading@11 7 * modify it under the terms of the GNU Lesser General Public
yading@11 8 * License as published by the Free Software Foundation; either
yading@11 9 * version 2.1 of the License, or (at your option) any later version.
yading@11 10 *
yading@11 11 * FFmpeg is distributed in the hope that it will be useful,
yading@11 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
yading@11 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
yading@11 14 * Lesser General Public License for more details.
yading@11 15 *
yading@11 16 * You should have received a copy of the GNU Lesser General Public
yading@11 17 * License along with FFmpeg; if not, write to the Free Software
yading@11 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
yading@11 19 */
yading@11 20
yading@11 21 #undef REAL_MOVNTQ
yading@11 22 #undef MOVNTQ
yading@11 23 #undef MOVNTQ2
yading@11 24 #undef PREFETCH
yading@11 25
yading@11 26 #if COMPILE_TEMPLATE_MMXEXT
yading@11 27 #define PREFETCH "prefetchnta"
yading@11 28 #else
yading@11 29 #define PREFETCH " # nop"
yading@11 30 #endif
yading@11 31
yading@11 32 #if COMPILE_TEMPLATE_MMXEXT
yading@11 33 #define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
yading@11 34 #define MOVNTQ2 "movntq "
yading@11 35 #else
yading@11 36 #define REAL_MOVNTQ(a,b) "movq " #a ", " #b " \n\t"
yading@11 37 #define MOVNTQ2 "movq "
yading@11 38 #endif
yading@11 39 #define MOVNTQ(a,b) REAL_MOVNTQ(a,b)
yading@11 40
yading@11 41 #if !COMPILE_TEMPLATE_MMXEXT
yading@11 42 static av_always_inline void
yading@11 43 dither_8to16(const uint8_t *srcDither, int rot)
yading@11 44 {
yading@11 45 if (rot) {
yading@11 46 __asm__ volatile("pxor %%mm0, %%mm0\n\t"
yading@11 47 "movq (%0), %%mm3\n\t"
yading@11 48 "movq %%mm3, %%mm4\n\t"
yading@11 49 "psrlq $24, %%mm3\n\t"
yading@11 50 "psllq $40, %%mm4\n\t"
yading@11 51 "por %%mm4, %%mm3\n\t"
yading@11 52 "movq %%mm3, %%mm4\n\t"
yading@11 53 "punpcklbw %%mm0, %%mm3\n\t"
yading@11 54 "punpckhbw %%mm0, %%mm4\n\t"
yading@11 55 :: "r"(srcDither)
yading@11 56 );
yading@11 57 } else {
yading@11 58 __asm__ volatile("pxor %%mm0, %%mm0\n\t"
yading@11 59 "movq (%0), %%mm3\n\t"
yading@11 60 "movq %%mm3, %%mm4\n\t"
yading@11 61 "punpcklbw %%mm0, %%mm3\n\t"
yading@11 62 "punpckhbw %%mm0, %%mm4\n\t"
yading@11 63 :: "r"(srcDither)
yading@11 64 );
yading@11 65 }
yading@11 66 }
yading@11 67 #endif
yading@11 68
yading@11 69 static void RENAME(yuv2yuvX)(const int16_t *filter, int filterSize,
yading@11 70 const int16_t **src, uint8_t *dest, int dstW,
yading@11 71 const uint8_t *dither, int offset)
yading@11 72 {
yading@11 73 dither_8to16(dither, offset);
yading@11 74 filterSize--;
yading@11 75 __asm__ volatile(
yading@11 76 "movd %0, %%mm1\n\t"
yading@11 77 "punpcklwd %%mm1, %%mm1\n\t"
yading@11 78 "punpckldq %%mm1, %%mm1\n\t"
yading@11 79 "psllw $3, %%mm1\n\t"
yading@11 80 "paddw %%mm1, %%mm3\n\t"
yading@11 81 "paddw %%mm1, %%mm4\n\t"
yading@11 82 "psraw $4, %%mm3\n\t"
yading@11 83 "psraw $4, %%mm4\n\t"
yading@11 84 ::"m"(filterSize)
yading@11 85 );
yading@11 86
yading@11 87 __asm__ volatile(\
yading@11 88 "movq %%mm3, %%mm6\n\t"
yading@11 89 "movq %%mm4, %%mm7\n\t"
yading@11 90 "movl %3, %%ecx\n\t"
yading@11 91 "mov %0, %%"REG_d" \n\t"\
yading@11 92 "mov (%%"REG_d"), %%"REG_S" \n\t"\
yading@11 93 ".p2align 4 \n\t" /* FIXME Unroll? */\
yading@11 94 "1: \n\t"\
yading@11 95 "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
yading@11 96 "movq (%%"REG_S", %%"REG_c", 2), %%mm2 \n\t" /* srcData */\
yading@11 97 "movq 8(%%"REG_S", %%"REG_c", 2), %%mm5 \n\t" /* srcData */\
yading@11 98 "add $16, %%"REG_d" \n\t"\
yading@11 99 "mov (%%"REG_d"), %%"REG_S" \n\t"\
yading@11 100 "test %%"REG_S", %%"REG_S" \n\t"\
yading@11 101 "pmulhw %%mm0, %%mm2 \n\t"\
yading@11 102 "pmulhw %%mm0, %%mm5 \n\t"\
yading@11 103 "paddw %%mm2, %%mm3 \n\t"\
yading@11 104 "paddw %%mm5, %%mm4 \n\t"\
yading@11 105 " jnz 1b \n\t"\
yading@11 106 "psraw $3, %%mm3 \n\t"\
yading@11 107 "psraw $3, %%mm4 \n\t"\
yading@11 108 "packuswb %%mm4, %%mm3 \n\t"
yading@11 109 MOVNTQ2 " %%mm3, (%1, %%"REG_c")\n\t"
yading@11 110 "add $8, %%"REG_c" \n\t"\
yading@11 111 "cmp %2, %%"REG_c" \n\t"\
yading@11 112 "movq %%mm6, %%mm3\n\t"
yading@11 113 "movq %%mm7, %%mm4\n\t"
yading@11 114 "mov %0, %%"REG_d" \n\t"\
yading@11 115 "mov (%%"REG_d"), %%"REG_S" \n\t"\
yading@11 116 "jb 1b \n\t"\
yading@11 117 :: "g" (filter),
yading@11 118 "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset)
yading@11 119 : "%"REG_d, "%"REG_S, "%"REG_c
yading@11 120 );
yading@11 121 }
yading@11 122
yading@11 123 #define YSCALEYUV2PACKEDX_UV \
yading@11 124 __asm__ volatile(\
yading@11 125 "xor %%"REG_a", %%"REG_a" \n\t"\
yading@11 126 ".p2align 4 \n\t"\
yading@11 127 "nop \n\t"\
yading@11 128 "1: \n\t"\
yading@11 129 "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\
yading@11 130 "mov (%%"REG_d"), %%"REG_S" \n\t"\
yading@11 131 "movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\
yading@11 132 "movq %%mm3, %%mm4 \n\t"\
yading@11 133 ".p2align 4 \n\t"\
yading@11 134 "2: \n\t"\
yading@11 135 "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
yading@11 136 "movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* UsrcData */\
yading@11 137 "add %6, %%"REG_S" \n\t" \
yading@11 138 "movq (%%"REG_S", %%"REG_a"), %%mm5 \n\t" /* VsrcData */\
yading@11 139 "add $16, %%"REG_d" \n\t"\
yading@11 140 "mov (%%"REG_d"), %%"REG_S" \n\t"\
yading@11 141 "pmulhw %%mm0, %%mm2 \n\t"\
yading@11 142 "pmulhw %%mm0, %%mm5 \n\t"\
yading@11 143 "paddw %%mm2, %%mm3 \n\t"\
yading@11 144 "paddw %%mm5, %%mm4 \n\t"\
yading@11 145 "test %%"REG_S", %%"REG_S" \n\t"\
yading@11 146 " jnz 2b \n\t"\
yading@11 147
yading@11 148 #define YSCALEYUV2PACKEDX_YA(offset,coeff,src1,src2,dst1,dst2) \
yading@11 149 "lea "offset"(%0), %%"REG_d" \n\t"\
yading@11 150 "mov (%%"REG_d"), %%"REG_S" \n\t"\
yading@11 151 "movq "VROUNDER_OFFSET"(%0), "#dst1" \n\t"\
yading@11 152 "movq "#dst1", "#dst2" \n\t"\
yading@11 153 ".p2align 4 \n\t"\
yading@11 154 "2: \n\t"\
yading@11 155 "movq 8(%%"REG_d"), "#coeff" \n\t" /* filterCoeff */\
yading@11 156 "movq (%%"REG_S", %%"REG_a", 2), "#src1" \n\t" /* Y1srcData */\
yading@11 157 "movq 8(%%"REG_S", %%"REG_a", 2), "#src2" \n\t" /* Y2srcData */\
yading@11 158 "add $16, %%"REG_d" \n\t"\
yading@11 159 "mov (%%"REG_d"), %%"REG_S" \n\t"\
yading@11 160 "pmulhw "#coeff", "#src1" \n\t"\
yading@11 161 "pmulhw "#coeff", "#src2" \n\t"\
yading@11 162 "paddw "#src1", "#dst1" \n\t"\
yading@11 163 "paddw "#src2", "#dst2" \n\t"\
yading@11 164 "test %%"REG_S", %%"REG_S" \n\t"\
yading@11 165 " jnz 2b \n\t"\
yading@11 166
yading@11 167 #define YSCALEYUV2PACKEDX \
yading@11 168 YSCALEYUV2PACKEDX_UV \
yading@11 169 YSCALEYUV2PACKEDX_YA(LUM_MMX_FILTER_OFFSET,%%mm0,%%mm2,%%mm5,%%mm1,%%mm7) \
yading@11 170
yading@11 171 #define YSCALEYUV2PACKEDX_END \
yading@11 172 :: "r" (&c->redDither), \
yading@11 173 "m" (dummy), "m" (dummy), "m" (dummy),\
yading@11 174 "r" (dest), "m" (dstW_reg), "m"(uv_off) \
yading@11 175 : "%"REG_a, "%"REG_d, "%"REG_S \
yading@11 176 );
yading@11 177
yading@11 178 #define YSCALEYUV2PACKEDX_ACCURATE_UV \
yading@11 179 __asm__ volatile(\
yading@11 180 "xor %%"REG_a", %%"REG_a" \n\t"\
yading@11 181 ".p2align 4 \n\t"\
yading@11 182 "nop \n\t"\
yading@11 183 "1: \n\t"\
yading@11 184 "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\
yading@11 185 "mov (%%"REG_d"), %%"REG_S" \n\t"\
yading@11 186 "pxor %%mm4, %%mm4 \n\t"\
yading@11 187 "pxor %%mm5, %%mm5 \n\t"\
yading@11 188 "pxor %%mm6, %%mm6 \n\t"\
yading@11 189 "pxor %%mm7, %%mm7 \n\t"\
yading@11 190 ".p2align 4 \n\t"\
yading@11 191 "2: \n\t"\
yading@11 192 "movq (%%"REG_S", %%"REG_a"), %%mm0 \n\t" /* UsrcData */\
yading@11 193 "add %6, %%"REG_S" \n\t" \
yading@11 194 "movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* VsrcData */\
yading@11 195 "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\
yading@11 196 "movq (%%"REG_S", %%"REG_a"), %%mm1 \n\t" /* UsrcData */\
yading@11 197 "movq %%mm0, %%mm3 \n\t"\
yading@11 198 "punpcklwd %%mm1, %%mm0 \n\t"\
yading@11 199 "punpckhwd %%mm1, %%mm3 \n\t"\
yading@11 200 "movq "STR(APCK_COEF)"(%%"REG_d"),%%mm1 \n\t" /* filterCoeff */\
yading@11 201 "pmaddwd %%mm1, %%mm0 \n\t"\
yading@11 202 "pmaddwd %%mm1, %%mm3 \n\t"\
yading@11 203 "paddd %%mm0, %%mm4 \n\t"\
yading@11 204 "paddd %%mm3, %%mm5 \n\t"\
yading@11 205 "add %6, %%"REG_S" \n\t" \
yading@11 206 "movq (%%"REG_S", %%"REG_a"), %%mm3 \n\t" /* VsrcData */\
yading@11 207 "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\
yading@11 208 "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\
yading@11 209 "test %%"REG_S", %%"REG_S" \n\t"\
yading@11 210 "movq %%mm2, %%mm0 \n\t"\
yading@11 211 "punpcklwd %%mm3, %%mm2 \n\t"\
yading@11 212 "punpckhwd %%mm3, %%mm0 \n\t"\
yading@11 213 "pmaddwd %%mm1, %%mm2 \n\t"\
yading@11 214 "pmaddwd %%mm1, %%mm0 \n\t"\
yading@11 215 "paddd %%mm2, %%mm6 \n\t"\
yading@11 216 "paddd %%mm0, %%mm7 \n\t"\
yading@11 217 " jnz 2b \n\t"\
yading@11 218 "psrad $16, %%mm4 \n\t"\
yading@11 219 "psrad $16, %%mm5 \n\t"\
yading@11 220 "psrad $16, %%mm6 \n\t"\
yading@11 221 "psrad $16, %%mm7 \n\t"\
yading@11 222 "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\
yading@11 223 "packssdw %%mm5, %%mm4 \n\t"\
yading@11 224 "packssdw %%mm7, %%mm6 \n\t"\
yading@11 225 "paddw %%mm0, %%mm4 \n\t"\
yading@11 226 "paddw %%mm0, %%mm6 \n\t"\
yading@11 227 "movq %%mm4, "U_TEMP"(%0) \n\t"\
yading@11 228 "movq %%mm6, "V_TEMP"(%0) \n\t"\
yading@11 229
yading@11 230 #define YSCALEYUV2PACKEDX_ACCURATE_YA(offset) \
yading@11 231 "lea "offset"(%0), %%"REG_d" \n\t"\
yading@11 232 "mov (%%"REG_d"), %%"REG_S" \n\t"\
yading@11 233 "pxor %%mm1, %%mm1 \n\t"\
yading@11 234 "pxor %%mm5, %%mm5 \n\t"\
yading@11 235 "pxor %%mm7, %%mm7 \n\t"\
yading@11 236 "pxor %%mm6, %%mm6 \n\t"\
yading@11 237 ".p2align 4 \n\t"\
yading@11 238 "2: \n\t"\
yading@11 239 "movq (%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* Y1srcData */\
yading@11 240 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y2srcData */\
yading@11 241 "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\
yading@11 242 "movq (%%"REG_S", %%"REG_a", 2), %%mm4 \n\t" /* Y1srcData */\
yading@11 243 "movq %%mm0, %%mm3 \n\t"\
yading@11 244 "punpcklwd %%mm4, %%mm0 \n\t"\
yading@11 245 "punpckhwd %%mm4, %%mm3 \n\t"\
yading@11 246 "movq "STR(APCK_COEF)"(%%"REG_d"), %%mm4 \n\t" /* filterCoeff */\
yading@11 247 "pmaddwd %%mm4, %%mm0 \n\t"\
yading@11 248 "pmaddwd %%mm4, %%mm3 \n\t"\
yading@11 249 "paddd %%mm0, %%mm1 \n\t"\
yading@11 250 "paddd %%mm3, %%mm5 \n\t"\
yading@11 251 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" /* Y2srcData */\
yading@11 252 "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\
yading@11 253 "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\
yading@11 254 "test %%"REG_S", %%"REG_S" \n\t"\
yading@11 255 "movq %%mm2, %%mm0 \n\t"\
yading@11 256 "punpcklwd %%mm3, %%mm2 \n\t"\
yading@11 257 "punpckhwd %%mm3, %%mm0 \n\t"\
yading@11 258 "pmaddwd %%mm4, %%mm2 \n\t"\
yading@11 259 "pmaddwd %%mm4, %%mm0 \n\t"\
yading@11 260 "paddd %%mm2, %%mm7 \n\t"\
yading@11 261 "paddd %%mm0, %%mm6 \n\t"\
yading@11 262 " jnz 2b \n\t"\
yading@11 263 "psrad $16, %%mm1 \n\t"\
yading@11 264 "psrad $16, %%mm5 \n\t"\
yading@11 265 "psrad $16, %%mm7 \n\t"\
yading@11 266 "psrad $16, %%mm6 \n\t"\
yading@11 267 "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\
yading@11 268 "packssdw %%mm5, %%mm1 \n\t"\
yading@11 269 "packssdw %%mm6, %%mm7 \n\t"\
yading@11 270 "paddw %%mm0, %%mm1 \n\t"\
yading@11 271 "paddw %%mm0, %%mm7 \n\t"\
yading@11 272 "movq "U_TEMP"(%0), %%mm3 \n\t"\
yading@11 273 "movq "V_TEMP"(%0), %%mm4 \n\t"\
yading@11 274
yading@11 275 #define YSCALEYUV2PACKEDX_ACCURATE \
yading@11 276 YSCALEYUV2PACKEDX_ACCURATE_UV \
yading@11 277 YSCALEYUV2PACKEDX_ACCURATE_YA(LUM_MMX_FILTER_OFFSET)
yading@11 278
yading@11 279 #define YSCALEYUV2RGBX \
yading@11 280 "psubw "U_OFFSET"(%0), %%mm3 \n\t" /* (U-128)8*/\
yading@11 281 "psubw "V_OFFSET"(%0), %%mm4 \n\t" /* (V-128)8*/\
yading@11 282 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
yading@11 283 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
yading@11 284 "pmulhw "UG_COEFF"(%0), %%mm3 \n\t"\
yading@11 285 "pmulhw "VG_COEFF"(%0), %%mm4 \n\t"\
yading@11 286 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
yading@11 287 "pmulhw "UB_COEFF"(%0), %%mm2 \n\t"\
yading@11 288 "pmulhw "VR_COEFF"(%0), %%mm5 \n\t"\
yading@11 289 "psubw "Y_OFFSET"(%0), %%mm1 \n\t" /* 8(Y-16)*/\
yading@11 290 "psubw "Y_OFFSET"(%0), %%mm7 \n\t" /* 8(Y-16)*/\
yading@11 291 "pmulhw "Y_COEFF"(%0), %%mm1 \n\t"\
yading@11 292 "pmulhw "Y_COEFF"(%0), %%mm7 \n\t"\
yading@11 293 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
yading@11 294 "paddw %%mm3, %%mm4 \n\t"\
yading@11 295 "movq %%mm2, %%mm0 \n\t"\
yading@11 296 "movq %%mm5, %%mm6 \n\t"\
yading@11 297 "movq %%mm4, %%mm3 \n\t"\
yading@11 298 "punpcklwd %%mm2, %%mm2 \n\t"\
yading@11 299 "punpcklwd %%mm5, %%mm5 \n\t"\
yading@11 300 "punpcklwd %%mm4, %%mm4 \n\t"\
yading@11 301 "paddw %%mm1, %%mm2 \n\t"\
yading@11 302 "paddw %%mm1, %%mm5 \n\t"\
yading@11 303 "paddw %%mm1, %%mm4 \n\t"\
yading@11 304 "punpckhwd %%mm0, %%mm0 \n\t"\
yading@11 305 "punpckhwd %%mm6, %%mm6 \n\t"\
yading@11 306 "punpckhwd %%mm3, %%mm3 \n\t"\
yading@11 307 "paddw %%mm7, %%mm0 \n\t"\
yading@11 308 "paddw %%mm7, %%mm6 \n\t"\
yading@11 309 "paddw %%mm7, %%mm3 \n\t"\
yading@11 310 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
yading@11 311 "packuswb %%mm0, %%mm2 \n\t"\
yading@11 312 "packuswb %%mm6, %%mm5 \n\t"\
yading@11 313 "packuswb %%mm3, %%mm4 \n\t"\
yading@11 314
yading@11 315 #define REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) \
yading@11 316 "movq "#b", "#q2" \n\t" /* B */\
yading@11 317 "movq "#r", "#t" \n\t" /* R */\
yading@11 318 "punpcklbw "#g", "#b" \n\t" /* GBGBGBGB 0 */\
yading@11 319 "punpcklbw "#a", "#r" \n\t" /* ARARARAR 0 */\
yading@11 320 "punpckhbw "#g", "#q2" \n\t" /* GBGBGBGB 2 */\
yading@11 321 "punpckhbw "#a", "#t" \n\t" /* ARARARAR 2 */\
yading@11 322 "movq "#b", "#q0" \n\t" /* GBGBGBGB 0 */\
yading@11 323 "movq "#q2", "#q3" \n\t" /* GBGBGBGB 2 */\
yading@11 324 "punpcklwd "#r", "#q0" \n\t" /* ARGBARGB 0 */\
yading@11 325 "punpckhwd "#r", "#b" \n\t" /* ARGBARGB 1 */\
yading@11 326 "punpcklwd "#t", "#q2" \n\t" /* ARGBARGB 2 */\
yading@11 327 "punpckhwd "#t", "#q3" \n\t" /* ARGBARGB 3 */\
yading@11 328 \
yading@11 329 MOVNTQ( q0, (dst, index, 4))\
yading@11 330 MOVNTQ( b, 8(dst, index, 4))\
yading@11 331 MOVNTQ( q2, 16(dst, index, 4))\
yading@11 332 MOVNTQ( q3, 24(dst, index, 4))\
yading@11 333 \
yading@11 334 "add $8, "#index" \n\t"\
yading@11 335 "cmp "#dstw", "#index" \n\t"\
yading@11 336 " jb 1b \n\t"
yading@11 337 #define WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t)
yading@11 338
yading@11 339 static void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilter,
yading@11 340 const int16_t **lumSrc, int lumFilterSize,
yading@11 341 const int16_t *chrFilter, const int16_t **chrUSrc,
yading@11 342 const int16_t **chrVSrc,
yading@11 343 int chrFilterSize, const int16_t **alpSrc,
yading@11 344 uint8_t *dest, int dstW, int dstY)
yading@11 345 {
yading@11 346 x86_reg dummy=0;
yading@11 347 x86_reg dstW_reg = dstW;
yading@11 348 x86_reg uv_off = c->uv_offx2;
yading@11 349
yading@11 350 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
yading@11 351 YSCALEYUV2PACKEDX_ACCURATE
yading@11 352 YSCALEYUV2RGBX
yading@11 353 "movq %%mm2, "U_TEMP"(%0) \n\t"
yading@11 354 "movq %%mm4, "V_TEMP"(%0) \n\t"
yading@11 355 "movq %%mm5, "Y_TEMP"(%0) \n\t"
yading@11 356 YSCALEYUV2PACKEDX_ACCURATE_YA(ALP_MMX_FILTER_OFFSET)
yading@11 357 "movq "Y_TEMP"(%0), %%mm5 \n\t"
yading@11 358 "psraw $3, %%mm1 \n\t"
yading@11 359 "psraw $3, %%mm7 \n\t"
yading@11 360 "packuswb %%mm7, %%mm1 \n\t"
yading@11 361 WRITEBGR32(%4, %5, %%REGa, %%mm3, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm2, %%mm6)
yading@11 362 YSCALEYUV2PACKEDX_END
yading@11 363 } else {
yading@11 364 YSCALEYUV2PACKEDX_ACCURATE
yading@11 365 YSCALEYUV2RGBX
yading@11 366 "pcmpeqd %%mm7, %%mm7 \n\t"
yading@11 367 WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
yading@11 368 YSCALEYUV2PACKEDX_END
yading@11 369 }
yading@11 370 }
yading@11 371
yading@11 372 static void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter,
yading@11 373 const int16_t **lumSrc, int lumFilterSize,
yading@11 374 const int16_t *chrFilter, const int16_t **chrUSrc,
yading@11 375 const int16_t **chrVSrc,
yading@11 376 int chrFilterSize, const int16_t **alpSrc,
yading@11 377 uint8_t *dest, int dstW, int dstY)
yading@11 378 {
yading@11 379 x86_reg dummy=0;
yading@11 380 x86_reg dstW_reg = dstW;
yading@11 381 x86_reg uv_off = c->uv_offx2;
yading@11 382
yading@11 383 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
yading@11 384 YSCALEYUV2PACKEDX
yading@11 385 YSCALEYUV2RGBX
yading@11 386 YSCALEYUV2PACKEDX_YA(ALP_MMX_FILTER_OFFSET, %%mm0, %%mm3, %%mm6, %%mm1, %%mm7)
yading@11 387 "psraw $3, %%mm1 \n\t"
yading@11 388 "psraw $3, %%mm7 \n\t"
yading@11 389 "packuswb %%mm7, %%mm1 \n\t"
yading@11 390 WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
yading@11 391 YSCALEYUV2PACKEDX_END
yading@11 392 } else {
yading@11 393 YSCALEYUV2PACKEDX
yading@11 394 YSCALEYUV2RGBX
yading@11 395 "pcmpeqd %%mm7, %%mm7 \n\t"
yading@11 396 WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
yading@11 397 YSCALEYUV2PACKEDX_END
yading@11 398 }
yading@11 399 }
yading@11 400
yading@11 401 #define REAL_WRITERGB16(dst, dstw, index) \
yading@11 402 "pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\
yading@11 403 "pand "MANGLE(bFC)", %%mm4 \n\t" /* G */\
yading@11 404 "pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\
yading@11 405 "psrlq $3, %%mm2 \n\t"\
yading@11 406 \
yading@11 407 "movq %%mm2, %%mm1 \n\t"\
yading@11 408 "movq %%mm4, %%mm3 \n\t"\
yading@11 409 \
yading@11 410 "punpcklbw %%mm7, %%mm3 \n\t"\
yading@11 411 "punpcklbw %%mm5, %%mm2 \n\t"\
yading@11 412 "punpckhbw %%mm7, %%mm4 \n\t"\
yading@11 413 "punpckhbw %%mm5, %%mm1 \n\t"\
yading@11 414 \
yading@11 415 "psllq $3, %%mm3 \n\t"\
yading@11 416 "psllq $3, %%mm4 \n\t"\
yading@11 417 \
yading@11 418 "por %%mm3, %%mm2 \n\t"\
yading@11 419 "por %%mm4, %%mm1 \n\t"\
yading@11 420 \
yading@11 421 MOVNTQ(%%mm2, (dst, index, 2))\
yading@11 422 MOVNTQ(%%mm1, 8(dst, index, 2))\
yading@11 423 \
yading@11 424 "add $8, "#index" \n\t"\
yading@11 425 "cmp "#dstw", "#index" \n\t"\
yading@11 426 " jb 1b \n\t"
yading@11 427 #define WRITERGB16(dst, dstw, index) REAL_WRITERGB16(dst, dstw, index)
yading@11 428
yading@11 429 static void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilter,
yading@11 430 const int16_t **lumSrc, int lumFilterSize,
yading@11 431 const int16_t *chrFilter, const int16_t **chrUSrc,
yading@11 432 const int16_t **chrVSrc,
yading@11 433 int chrFilterSize, const int16_t **alpSrc,
yading@11 434 uint8_t *dest, int dstW, int dstY)
yading@11 435 {
yading@11 436 x86_reg dummy=0;
yading@11 437 x86_reg dstW_reg = dstW;
yading@11 438 x86_reg uv_off = c->uv_offx2;
yading@11 439
yading@11 440 YSCALEYUV2PACKEDX_ACCURATE
yading@11 441 YSCALEYUV2RGBX
yading@11 442 "pxor %%mm7, %%mm7 \n\t"
yading@11 443 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
yading@11 444 #ifdef DITHER1XBPP
yading@11 445 "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
yading@11 446 "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
yading@11 447 "paddusb "RED_DITHER"(%0), %%mm5\n\t"
yading@11 448 #endif
yading@11 449 WRITERGB16(%4, %5, %%REGa)
yading@11 450 YSCALEYUV2PACKEDX_END
yading@11 451 }
yading@11 452
yading@11 453 static void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter,
yading@11 454 const int16_t **lumSrc, int lumFilterSize,
yading@11 455 const int16_t *chrFilter, const int16_t **chrUSrc,
yading@11 456 const int16_t **chrVSrc,
yading@11 457 int chrFilterSize, const int16_t **alpSrc,
yading@11 458 uint8_t *dest, int dstW, int dstY)
yading@11 459 {
yading@11 460 x86_reg dummy=0;
yading@11 461 x86_reg dstW_reg = dstW;
yading@11 462 x86_reg uv_off = c->uv_offx2;
yading@11 463
yading@11 464 YSCALEYUV2PACKEDX
yading@11 465 YSCALEYUV2RGBX
yading@11 466 "pxor %%mm7, %%mm7 \n\t"
yading@11 467 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
yading@11 468 #ifdef DITHER1XBPP
yading@11 469 "paddusb "BLUE_DITHER"(%0), %%mm2 \n\t"
yading@11 470 "paddusb "GREEN_DITHER"(%0), %%mm4 \n\t"
yading@11 471 "paddusb "RED_DITHER"(%0), %%mm5 \n\t"
yading@11 472 #endif
yading@11 473 WRITERGB16(%4, %5, %%REGa)
yading@11 474 YSCALEYUV2PACKEDX_END
yading@11 475 }
yading@11 476
yading@11 477 #define REAL_WRITERGB15(dst, dstw, index) \
yading@11 478 "pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\
yading@11 479 "pand "MANGLE(bF8)", %%mm4 \n\t" /* G */\
yading@11 480 "pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\
yading@11 481 "psrlq $3, %%mm2 \n\t"\
yading@11 482 "psrlq $1, %%mm5 \n\t"\
yading@11 483 \
yading@11 484 "movq %%mm2, %%mm1 \n\t"\
yading@11 485 "movq %%mm4, %%mm3 \n\t"\
yading@11 486 \
yading@11 487 "punpcklbw %%mm7, %%mm3 \n\t"\
yading@11 488 "punpcklbw %%mm5, %%mm2 \n\t"\
yading@11 489 "punpckhbw %%mm7, %%mm4 \n\t"\
yading@11 490 "punpckhbw %%mm5, %%mm1 \n\t"\
yading@11 491 \
yading@11 492 "psllq $2, %%mm3 \n\t"\
yading@11 493 "psllq $2, %%mm4 \n\t"\
yading@11 494 \
yading@11 495 "por %%mm3, %%mm2 \n\t"\
yading@11 496 "por %%mm4, %%mm1 \n\t"\
yading@11 497 \
yading@11 498 MOVNTQ(%%mm2, (dst, index, 2))\
yading@11 499 MOVNTQ(%%mm1, 8(dst, index, 2))\
yading@11 500 \
yading@11 501 "add $8, "#index" \n\t"\
yading@11 502 "cmp "#dstw", "#index" \n\t"\
yading@11 503 " jb 1b \n\t"
yading@11 504 #define WRITERGB15(dst, dstw, index) REAL_WRITERGB15(dst, dstw, index)
yading@11 505
yading@11 506 static void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilter,
yading@11 507 const int16_t **lumSrc, int lumFilterSize,
yading@11 508 const int16_t *chrFilter, const int16_t **chrUSrc,
yading@11 509 const int16_t **chrVSrc,
yading@11 510 int chrFilterSize, const int16_t **alpSrc,
yading@11 511 uint8_t *dest, int dstW, int dstY)
yading@11 512 {
yading@11 513 x86_reg dummy=0;
yading@11 514 x86_reg dstW_reg = dstW;
yading@11 515 x86_reg uv_off = c->uv_offx2;
yading@11 516
yading@11 517 YSCALEYUV2PACKEDX_ACCURATE
yading@11 518 YSCALEYUV2RGBX
yading@11 519 "pxor %%mm7, %%mm7 \n\t"
yading@11 520 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
yading@11 521 #ifdef DITHER1XBPP
yading@11 522 "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
yading@11 523 "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
yading@11 524 "paddusb "RED_DITHER"(%0), %%mm5\n\t"
yading@11 525 #endif
yading@11 526 WRITERGB15(%4, %5, %%REGa)
yading@11 527 YSCALEYUV2PACKEDX_END
yading@11 528 }
yading@11 529
yading@11 530 static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter,
yading@11 531 const int16_t **lumSrc, int lumFilterSize,
yading@11 532 const int16_t *chrFilter, const int16_t **chrUSrc,
yading@11 533 const int16_t **chrVSrc,
yading@11 534 int chrFilterSize, const int16_t **alpSrc,
yading@11 535 uint8_t *dest, int dstW, int dstY)
yading@11 536 {
yading@11 537 x86_reg dummy=0;
yading@11 538 x86_reg dstW_reg = dstW;
yading@11 539 x86_reg uv_off = c->uv_offx2;
yading@11 540
yading@11 541 YSCALEYUV2PACKEDX
yading@11 542 YSCALEYUV2RGBX
yading@11 543 "pxor %%mm7, %%mm7 \n\t"
yading@11 544 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
yading@11 545 #ifdef DITHER1XBPP
yading@11 546 "paddusb "BLUE_DITHER"(%0), %%mm2 \n\t"
yading@11 547 "paddusb "GREEN_DITHER"(%0), %%mm4 \n\t"
yading@11 548 "paddusb "RED_DITHER"(%0), %%mm5 \n\t"
yading@11 549 #endif
yading@11 550 WRITERGB15(%4, %5, %%REGa)
yading@11 551 YSCALEYUV2PACKEDX_END
yading@11 552 }
yading@11 553
yading@11 554 #define WRITEBGR24MMX(dst, dstw, index) \
yading@11 555 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
yading@11 556 "movq %%mm2, %%mm1 \n\t" /* B */\
yading@11 557 "movq %%mm5, %%mm6 \n\t" /* R */\
yading@11 558 "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\
yading@11 559 "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\
yading@11 560 "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\
yading@11 561 "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\
yading@11 562 "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\
yading@11 563 "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\
yading@11 564 "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\
yading@11 565 "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\
yading@11 566 "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\
yading@11 567 "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\
yading@11 568 \
yading@11 569 "movq %%mm0, %%mm4 \n\t" /* 0RGB0RGB 0 */\
yading@11 570 "movq %%mm2, %%mm6 \n\t" /* 0RGB0RGB 1 */\
yading@11 571 "movq %%mm1, %%mm5 \n\t" /* 0RGB0RGB 2 */\
yading@11 572 "movq %%mm3, %%mm7 \n\t" /* 0RGB0RGB 3 */\
yading@11 573 \
yading@11 574 "psllq $40, %%mm0 \n\t" /* RGB00000 0 */\
yading@11 575 "psllq $40, %%mm2 \n\t" /* RGB00000 1 */\
yading@11 576 "psllq $40, %%mm1 \n\t" /* RGB00000 2 */\
yading@11 577 "psllq $40, %%mm3 \n\t" /* RGB00000 3 */\
yading@11 578 \
yading@11 579 "punpckhdq %%mm4, %%mm0 \n\t" /* 0RGBRGB0 0 */\
yading@11 580 "punpckhdq %%mm6, %%mm2 \n\t" /* 0RGBRGB0 1 */\
yading@11 581 "punpckhdq %%mm5, %%mm1 \n\t" /* 0RGBRGB0 2 */\
yading@11 582 "punpckhdq %%mm7, %%mm3 \n\t" /* 0RGBRGB0 3 */\
yading@11 583 \
yading@11 584 "psrlq $8, %%mm0 \n\t" /* 00RGBRGB 0 */\
yading@11 585 "movq %%mm2, %%mm6 \n\t" /* 0RGBRGB0 1 */\
yading@11 586 "psllq $40, %%mm2 \n\t" /* GB000000 1 */\
yading@11 587 "por %%mm2, %%mm0 \n\t" /* GBRGBRGB 0 */\
yading@11 588 MOVNTQ(%%mm0, (dst))\
yading@11 589 \
yading@11 590 "psrlq $24, %%mm6 \n\t" /* 0000RGBR 1 */\
yading@11 591 "movq %%mm1, %%mm5 \n\t" /* 0RGBRGB0 2 */\
yading@11 592 "psllq $24, %%mm1 \n\t" /* BRGB0000 2 */\
yading@11 593 "por %%mm1, %%mm6 \n\t" /* BRGBRGBR 1 */\
yading@11 594 MOVNTQ(%%mm6, 8(dst))\
yading@11 595 \
yading@11 596 "psrlq $40, %%mm5 \n\t" /* 000000RG 2 */\
yading@11 597 "psllq $8, %%mm3 \n\t" /* RGBRGB00 3 */\
yading@11 598 "por %%mm3, %%mm5 \n\t" /* RGBRGBRG 2 */\
yading@11 599 MOVNTQ(%%mm5, 16(dst))\
yading@11 600 \
yading@11 601 "add $24, "#dst" \n\t"\
yading@11 602 \
yading@11 603 "add $8, "#index" \n\t"\
yading@11 604 "cmp "#dstw", "#index" \n\t"\
yading@11 605 " jb 1b \n\t"
yading@11 606
yading@11 607 #define WRITEBGR24MMXEXT(dst, dstw, index) \
yading@11 608 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
yading@11 609 "movq "MANGLE(ff_M24A)", %%mm0 \n\t"\
yading@11 610 "movq "MANGLE(ff_M24C)", %%mm7 \n\t"\
yading@11 611 "pshufw $0x50, %%mm2, %%mm1 \n\t" /* B3 B2 B3 B2 B1 B0 B1 B0 */\
yading@11 612 "pshufw $0x50, %%mm4, %%mm3 \n\t" /* G3 G2 G3 G2 G1 G0 G1 G0 */\
yading@11 613 "pshufw $0x00, %%mm5, %%mm6 \n\t" /* R1 R0 R1 R0 R1 R0 R1 R0 */\
yading@11 614 \
yading@11 615 "pand %%mm0, %%mm1 \n\t" /* B2 B1 B0 */\
yading@11 616 "pand %%mm0, %%mm3 \n\t" /* G2 G1 G0 */\
yading@11 617 "pand %%mm7, %%mm6 \n\t" /* R1 R0 */\
yading@11 618 \
yading@11 619 "psllq $8, %%mm3 \n\t" /* G2 G1 G0 */\
yading@11 620 "por %%mm1, %%mm6 \n\t"\
yading@11 621 "por %%mm3, %%mm6 \n\t"\
yading@11 622 MOVNTQ(%%mm6, (dst))\
yading@11 623 \
yading@11 624 "psrlq $8, %%mm4 \n\t" /* 00 G7 G6 G5 G4 G3 G2 G1 */\
yading@11 625 "pshufw $0xA5, %%mm2, %%mm1 \n\t" /* B5 B4 B5 B4 B3 B2 B3 B2 */\
yading@11 626 "pshufw $0x55, %%mm4, %%mm3 \n\t" /* G4 G3 G4 G3 G4 G3 G4 G3 */\
yading@11 627 "pshufw $0xA5, %%mm5, %%mm6 \n\t" /* R5 R4 R5 R4 R3 R2 R3 R2 */\
yading@11 628 \
yading@11 629 "pand "MANGLE(ff_M24B)", %%mm1 \n\t" /* B5 B4 B3 */\
yading@11 630 "pand %%mm7, %%mm3 \n\t" /* G4 G3 */\
yading@11 631 "pand %%mm0, %%mm6 \n\t" /* R4 R3 R2 */\
yading@11 632 \
yading@11 633 "por %%mm1, %%mm3 \n\t" /* B5 G4 B4 G3 B3 */\
yading@11 634 "por %%mm3, %%mm6 \n\t"\
yading@11 635 MOVNTQ(%%mm6, 8(dst))\
yading@11 636 \
yading@11 637 "pshufw $0xFF, %%mm2, %%mm1 \n\t" /* B7 B6 B7 B6 B7 B6 B6 B7 */\
yading@11 638 "pshufw $0xFA, %%mm4, %%mm3 \n\t" /* 00 G7 00 G7 G6 G5 G6 G5 */\
yading@11 639 "pshufw $0xFA, %%mm5, %%mm6 \n\t" /* R7 R6 R7 R6 R5 R4 R5 R4 */\
yading@11 640 \
yading@11 641 "pand %%mm7, %%mm1 \n\t" /* B7 B6 */\
yading@11 642 "pand %%mm0, %%mm3 \n\t" /* G7 G6 G5 */\
yading@11 643 "pand "MANGLE(ff_M24B)", %%mm6 \n\t" /* R7 R6 R5 */\
yading@11 644 \
yading@11 645 "por %%mm1, %%mm3 \n\t"\
yading@11 646 "por %%mm3, %%mm6 \n\t"\
yading@11 647 MOVNTQ(%%mm6, 16(dst))\
yading@11 648 \
yading@11 649 "add $24, "#dst" \n\t"\
yading@11 650 \
yading@11 651 "add $8, "#index" \n\t"\
yading@11 652 "cmp "#dstw", "#index" \n\t"\
yading@11 653 " jb 1b \n\t"
yading@11 654
yading@11 655 #if COMPILE_TEMPLATE_MMXEXT
yading@11 656 #undef WRITEBGR24
yading@11 657 #define WRITEBGR24(dst, dstw, index) WRITEBGR24MMXEXT(dst, dstw, index)
yading@11 658 #else
yading@11 659 #undef WRITEBGR24
yading@11 660 #define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX(dst, dstw, index)
yading@11 661 #endif
yading@11 662
yading@11 663 static void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter,
yading@11 664 const int16_t **lumSrc, int lumFilterSize,
yading@11 665 const int16_t *chrFilter, const int16_t **chrUSrc,
yading@11 666 const int16_t **chrVSrc,
yading@11 667 int chrFilterSize, const int16_t **alpSrc,
yading@11 668 uint8_t *dest, int dstW, int dstY)
yading@11 669 {
yading@11 670 x86_reg dummy=0;
yading@11 671 x86_reg dstW_reg = dstW;
yading@11 672 x86_reg uv_off = c->uv_offx2;
yading@11 673
yading@11 674 YSCALEYUV2PACKEDX_ACCURATE
yading@11 675 YSCALEYUV2RGBX
yading@11 676 "pxor %%mm7, %%mm7 \n\t"
yading@11 677 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t" //FIXME optimize
yading@11 678 "add %4, %%"REG_c" \n\t"
yading@11 679 WRITEBGR24(%%REGc, %5, %%REGa)
yading@11 680 :: "r" (&c->redDither),
yading@11 681 "m" (dummy), "m" (dummy), "m" (dummy),
yading@11 682 "r" (dest), "m" (dstW_reg), "m"(uv_off)
yading@11 683 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
yading@11 684 );
yading@11 685 }
yading@11 686
yading@11 687 static void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter,
yading@11 688 const int16_t **lumSrc, int lumFilterSize,
yading@11 689 const int16_t *chrFilter, const int16_t **chrUSrc,
yading@11 690 const int16_t **chrVSrc,
yading@11 691 int chrFilterSize, const int16_t **alpSrc,
yading@11 692 uint8_t *dest, int dstW, int dstY)
yading@11 693 {
yading@11 694 x86_reg dummy=0;
yading@11 695 x86_reg dstW_reg = dstW;
yading@11 696 x86_reg uv_off = c->uv_offx2;
yading@11 697
yading@11 698 YSCALEYUV2PACKEDX
yading@11 699 YSCALEYUV2RGBX
yading@11 700 "pxor %%mm7, %%mm7 \n\t"
yading@11 701 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c" \n\t" //FIXME optimize
yading@11 702 "add %4, %%"REG_c" \n\t"
yading@11 703 WRITEBGR24(%%REGc, %5, %%REGa)
yading@11 704 :: "r" (&c->redDither),
yading@11 705 "m" (dummy), "m" (dummy), "m" (dummy),
yading@11 706 "r" (dest), "m" (dstW_reg), "m"(uv_off)
yading@11 707 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
yading@11 708 );
yading@11 709 }
yading@11 710
yading@11 711 #define REAL_WRITEYUY2(dst, dstw, index) \
yading@11 712 "packuswb %%mm3, %%mm3 \n\t"\
yading@11 713 "packuswb %%mm4, %%mm4 \n\t"\
yading@11 714 "packuswb %%mm7, %%mm1 \n\t"\
yading@11 715 "punpcklbw %%mm4, %%mm3 \n\t"\
yading@11 716 "movq %%mm1, %%mm7 \n\t"\
yading@11 717 "punpcklbw %%mm3, %%mm1 \n\t"\
yading@11 718 "punpckhbw %%mm3, %%mm7 \n\t"\
yading@11 719 \
yading@11 720 MOVNTQ(%%mm1, (dst, index, 2))\
yading@11 721 MOVNTQ(%%mm7, 8(dst, index, 2))\
yading@11 722 \
yading@11 723 "add $8, "#index" \n\t"\
yading@11 724 "cmp "#dstw", "#index" \n\t"\
yading@11 725 " jb 1b \n\t"
yading@11 726 #define WRITEYUY2(dst, dstw, index) REAL_WRITEYUY2(dst, dstw, index)
yading@11 727
yading@11 728 static void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFilter,
yading@11 729 const int16_t **lumSrc, int lumFilterSize,
yading@11 730 const int16_t *chrFilter, const int16_t **chrUSrc,
yading@11 731 const int16_t **chrVSrc,
yading@11 732 int chrFilterSize, const int16_t **alpSrc,
yading@11 733 uint8_t *dest, int dstW, int dstY)
yading@11 734 {
yading@11 735 x86_reg dummy=0;
yading@11 736 x86_reg dstW_reg = dstW;
yading@11 737 x86_reg uv_off = c->uv_offx2;
yading@11 738
yading@11 739 YSCALEYUV2PACKEDX_ACCURATE
yading@11 740 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
yading@11 741 "psraw $3, %%mm3 \n\t"
yading@11 742 "psraw $3, %%mm4 \n\t"
yading@11 743 "psraw $3, %%mm1 \n\t"
yading@11 744 "psraw $3, %%mm7 \n\t"
yading@11 745 WRITEYUY2(%4, %5, %%REGa)
yading@11 746 YSCALEYUV2PACKEDX_END
yading@11 747 }
yading@11 748
yading@11 749 static void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter,
yading@11 750 const int16_t **lumSrc, int lumFilterSize,
yading@11 751 const int16_t *chrFilter, const int16_t **chrUSrc,
yading@11 752 const int16_t **chrVSrc,
yading@11 753 int chrFilterSize, const int16_t **alpSrc,
yading@11 754 uint8_t *dest, int dstW, int dstY)
yading@11 755 {
yading@11 756 x86_reg dummy=0;
yading@11 757 x86_reg dstW_reg = dstW;
yading@11 758 x86_reg uv_off = c->uv_offx2;
yading@11 759
yading@11 760 YSCALEYUV2PACKEDX
yading@11 761 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
yading@11 762 "psraw $3, %%mm3 \n\t"
yading@11 763 "psraw $3, %%mm4 \n\t"
yading@11 764 "psraw $3, %%mm1 \n\t"
yading@11 765 "psraw $3, %%mm7 \n\t"
yading@11 766 WRITEYUY2(%4, %5, %%REGa)
yading@11 767 YSCALEYUV2PACKEDX_END
yading@11 768 }
yading@11 769
yading@11 770 #define REAL_YSCALEYUV2RGB_UV(index, c) \
yading@11 771 "xor "#index", "#index" \n\t"\
yading@11 772 ".p2align 4 \n\t"\
yading@11 773 "1: \n\t"\
yading@11 774 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
yading@11 775 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
yading@11 776 "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
yading@11 777 "movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
yading@11 778 "movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
yading@11 779 "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
yading@11 780 "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
yading@11 781 "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
yading@11 782 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
yading@11 783 "pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
yading@11 784 "pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
yading@11 785 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
yading@11 786 "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
yading@11 787 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
yading@11 788 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
yading@11 789 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\
yading@11 790 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\
yading@11 791 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
yading@11 792 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
yading@11 793 "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
yading@11 794 "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
yading@11 795 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
yading@11 796
yading@11 797 #define REAL_YSCALEYUV2RGB_YA(index, c, b1, b2) \
yading@11 798 "movq ("#b1", "#index", 2), %%mm0 \n\t" /*buf0[eax]*/\
yading@11 799 "movq ("#b2", "#index", 2), %%mm1 \n\t" /*buf1[eax]*/\
yading@11 800 "movq 8("#b1", "#index", 2), %%mm6 \n\t" /*buf0[eax]*/\
yading@11 801 "movq 8("#b2", "#index", 2), %%mm7 \n\t" /*buf1[eax]*/\
yading@11 802 "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\
yading@11 803 "psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\
yading@11 804 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
yading@11 805 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
yading@11 806 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
yading@11 807 "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
yading@11 808 "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
yading@11 809 "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
yading@11 810
yading@11 811 #define REAL_YSCALEYUV2RGB_COEFF(c) \
yading@11 812 "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
yading@11 813 "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
yading@11 814 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\
yading@11 815 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\
yading@11 816 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
yading@11 817 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
yading@11 818 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
yading@11 819 "paddw %%mm3, %%mm4 \n\t"\
yading@11 820 "movq %%mm2, %%mm0 \n\t"\
yading@11 821 "movq %%mm5, %%mm6 \n\t"\
yading@11 822 "movq %%mm4, %%mm3 \n\t"\
yading@11 823 "punpcklwd %%mm2, %%mm2 \n\t"\
yading@11 824 "punpcklwd %%mm5, %%mm5 \n\t"\
yading@11 825 "punpcklwd %%mm4, %%mm4 \n\t"\
yading@11 826 "paddw %%mm1, %%mm2 \n\t"\
yading@11 827 "paddw %%mm1, %%mm5 \n\t"\
yading@11 828 "paddw %%mm1, %%mm4 \n\t"\
yading@11 829 "punpckhwd %%mm0, %%mm0 \n\t"\
yading@11 830 "punpckhwd %%mm6, %%mm6 \n\t"\
yading@11 831 "punpckhwd %%mm3, %%mm3 \n\t"\
yading@11 832 "paddw %%mm7, %%mm0 \n\t"\
yading@11 833 "paddw %%mm7, %%mm6 \n\t"\
yading@11 834 "paddw %%mm7, %%mm3 \n\t"\
yading@11 835 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
yading@11 836 "packuswb %%mm0, %%mm2 \n\t"\
yading@11 837 "packuswb %%mm6, %%mm5 \n\t"\
yading@11 838 "packuswb %%mm3, %%mm4 \n\t"\
yading@11 839
yading@11 840 #define YSCALEYUV2RGB_YA(index, c, b1, b2) REAL_YSCALEYUV2RGB_YA(index, c, b1, b2)
yading@11 841
yading@11 842 #define YSCALEYUV2RGB(index, c) \
yading@11 843 REAL_YSCALEYUV2RGB_UV(index, c) \
yading@11 844 REAL_YSCALEYUV2RGB_YA(index, c, %0, %1) \
yading@11 845 REAL_YSCALEYUV2RGB_COEFF(c)
yading@11 846
yading@11 847 /**
yading@11 848 * vertical bilinear scale YV12 to RGB
yading@11 849 */
yading@11 850 static void RENAME(yuv2rgb32_2)(SwsContext *c, const int16_t *buf[2],
yading@11 851 const int16_t *ubuf[2], const int16_t *vbuf[2],
yading@11 852 const int16_t *abuf[2], uint8_t *dest,
yading@11 853 int dstW, int yalpha, int uvalpha, int y)
yading@11 854 {
yading@11 855 const int16_t *buf0 = buf[0], *buf1 = buf[1],
yading@11 856 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
yading@11 857
yading@11 858 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
yading@11 859 const int16_t *abuf0 = abuf[0], *abuf1 = abuf[1];
yading@11 860 #if ARCH_X86_64
yading@11 861 __asm__ volatile(
yading@11 862 YSCALEYUV2RGB(%%r8, %5)
yading@11 863 YSCALEYUV2RGB_YA(%%r8, %5, %6, %7)
yading@11 864 "psraw $3, %%mm1 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
yading@11 865 "psraw $3, %%mm7 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
yading@11 866 "packuswb %%mm7, %%mm1 \n\t"
yading@11 867 WRITEBGR32(%4, 8280(%5), %%r8, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
yading@11 868 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "r" (dest),
yading@11 869 "a" (&c->redDither),
yading@11 870 "r" (abuf0), "r" (abuf1)
yading@11 871 : "%r8"
yading@11 872 );
yading@11 873 #else
yading@11 874 c->u_temp=(intptr_t)abuf0;
yading@11 875 c->v_temp=(intptr_t)abuf1;
yading@11 876 __asm__ volatile(
yading@11 877 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
yading@11 878 "mov %4, %%"REG_b" \n\t"
yading@11 879 "push %%"REG_BP" \n\t"
yading@11 880 YSCALEYUV2RGB(%%REGBP, %5)
yading@11 881 "push %0 \n\t"
yading@11 882 "push %1 \n\t"
yading@11 883 "mov "U_TEMP"(%5), %0 \n\t"
yading@11 884 "mov "V_TEMP"(%5), %1 \n\t"
yading@11 885 YSCALEYUV2RGB_YA(%%REGBP, %5, %0, %1)
yading@11 886 "psraw $3, %%mm1 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
yading@11 887 "psraw $3, %%mm7 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
yading@11 888 "packuswb %%mm7, %%mm1 \n\t"
yading@11 889 "pop %1 \n\t"
yading@11 890 "pop %0 \n\t"
yading@11 891 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
yading@11 892 "pop %%"REG_BP" \n\t"
yading@11 893 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
yading@11 894 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
yading@11 895 "a" (&c->redDither)
yading@11 896 );
yading@11 897 #endif
yading@11 898 } else {
yading@11 899 __asm__ volatile(
yading@11 900 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
yading@11 901 "mov %4, %%"REG_b" \n\t"
yading@11 902 "push %%"REG_BP" \n\t"
yading@11 903 YSCALEYUV2RGB(%%REGBP, %5)
yading@11 904 "pcmpeqd %%mm7, %%mm7 \n\t"
yading@11 905 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
yading@11 906 "pop %%"REG_BP" \n\t"
yading@11 907 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
yading@11 908 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
yading@11 909 "a" (&c->redDither)
yading@11 910 );
yading@11 911 }
yading@11 912 }
yading@11 913
yading@11 914 static void RENAME(yuv2bgr24_2)(SwsContext *c, const int16_t *buf[2],
yading@11 915 const int16_t *ubuf[2], const int16_t *vbuf[2],
yading@11 916 const int16_t *abuf[2], uint8_t *dest,
yading@11 917 int dstW, int yalpha, int uvalpha, int y)
yading@11 918 {
yading@11 919 const int16_t *buf0 = buf[0], *buf1 = buf[1],
yading@11 920 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
yading@11 921
yading@11 922 //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
yading@11 923 __asm__ volatile(
yading@11 924 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
yading@11 925 "mov %4, %%"REG_b" \n\t"
yading@11 926 "push %%"REG_BP" \n\t"
yading@11 927 YSCALEYUV2RGB(%%REGBP, %5)
yading@11 928 "pxor %%mm7, %%mm7 \n\t"
yading@11 929 WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
yading@11 930 "pop %%"REG_BP" \n\t"
yading@11 931 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
yading@11 932 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
yading@11 933 "a" (&c->redDither)
yading@11 934 );
yading@11 935 }
yading@11 936
yading@11 937 static void RENAME(yuv2rgb555_2)(SwsContext *c, const int16_t *buf[2],
yading@11 938 const int16_t *ubuf[2], const int16_t *vbuf[2],
yading@11 939 const int16_t *abuf[2], uint8_t *dest,
yading@11 940 int dstW, int yalpha, int uvalpha, int y)
yading@11 941 {
yading@11 942 const int16_t *buf0 = buf[0], *buf1 = buf[1],
yading@11 943 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
yading@11 944
yading@11 945 //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
yading@11 946 __asm__ volatile(
yading@11 947 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
yading@11 948 "mov %4, %%"REG_b" \n\t"
yading@11 949 "push %%"REG_BP" \n\t"
yading@11 950 YSCALEYUV2RGB(%%REGBP, %5)
yading@11 951 "pxor %%mm7, %%mm7 \n\t"
yading@11 952 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
yading@11 953 #ifdef DITHER1XBPP
yading@11 954 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
yading@11 955 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
yading@11 956 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
yading@11 957 #endif
yading@11 958 WRITERGB15(%%REGb, 8280(%5), %%REGBP)
yading@11 959 "pop %%"REG_BP" \n\t"
yading@11 960 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
yading@11 961 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
yading@11 962 "a" (&c->redDither)
yading@11 963 );
yading@11 964 }
yading@11 965
yading@11 966 static void RENAME(yuv2rgb565_2)(SwsContext *c, const int16_t *buf[2],
yading@11 967 const int16_t *ubuf[2], const int16_t *vbuf[2],
yading@11 968 const int16_t *abuf[2], uint8_t *dest,
yading@11 969 int dstW, int yalpha, int uvalpha, int y)
yading@11 970 {
yading@11 971 const int16_t *buf0 = buf[0], *buf1 = buf[1],
yading@11 972 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
yading@11 973
yading@11 974 //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
yading@11 975 __asm__ volatile(
yading@11 976 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
yading@11 977 "mov %4, %%"REG_b" \n\t"
yading@11 978 "push %%"REG_BP" \n\t"
yading@11 979 YSCALEYUV2RGB(%%REGBP, %5)
yading@11 980 "pxor %%mm7, %%mm7 \n\t"
yading@11 981 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
yading@11 982 #ifdef DITHER1XBPP
yading@11 983 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
yading@11 984 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
yading@11 985 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
yading@11 986 #endif
yading@11 987 WRITERGB16(%%REGb, 8280(%5), %%REGBP)
yading@11 988 "pop %%"REG_BP" \n\t"
yading@11 989 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
yading@11 990 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
yading@11 991 "a" (&c->redDither)
yading@11 992 );
yading@11 993 }
yading@11 994
yading@11 995 #define REAL_YSCALEYUV2PACKED(index, c) \
yading@11 996 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
yading@11 997 "movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1 \n\t"\
yading@11 998 "psraw $3, %%mm0 \n\t"\
yading@11 999 "psraw $3, %%mm1 \n\t"\
yading@11 1000 "movq %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c") \n\t"\
yading@11 1001 "movq %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c") \n\t"\
yading@11 1002 "xor "#index", "#index" \n\t"\
yading@11 1003 ".p2align 4 \n\t"\
yading@11 1004 "1: \n\t"\
yading@11 1005 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
yading@11 1006 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
yading@11 1007 "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
yading@11 1008 "movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
yading@11 1009 "movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
yading@11 1010 "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
yading@11 1011 "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
yading@11 1012 "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
yading@11 1013 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
yading@11 1014 "pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
yading@11 1015 "pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
yading@11 1016 "psraw $7, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
yading@11 1017 "psraw $7, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
yading@11 1018 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
yading@11 1019 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
yading@11 1020 "movq (%0, "#index", 2), %%mm0 \n\t" /*buf0[eax]*/\
yading@11 1021 "movq (%1, "#index", 2), %%mm1 \n\t" /*buf1[eax]*/\
yading@11 1022 "movq 8(%0, "#index", 2), %%mm6 \n\t" /*buf0[eax]*/\
yading@11 1023 "movq 8(%1, "#index", 2), %%mm7 \n\t" /*buf1[eax]*/\
yading@11 1024 "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\
yading@11 1025 "psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\
yading@11 1026 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
yading@11 1027 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
yading@11 1028 "psraw $7, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
yading@11 1029 "psraw $7, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
yading@11 1030 "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
yading@11 1031 "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
yading@11 1032
yading@11 1033 #define YSCALEYUV2PACKED(index, c) REAL_YSCALEYUV2PACKED(index, c)
yading@11 1034
yading@11 1035 static void RENAME(yuv2yuyv422_2)(SwsContext *c, const int16_t *buf[2],
yading@11 1036 const int16_t *ubuf[2], const int16_t *vbuf[2],
yading@11 1037 const int16_t *abuf[2], uint8_t *dest,
yading@11 1038 int dstW, int yalpha, int uvalpha, int y)
yading@11 1039 {
yading@11 1040 const int16_t *buf0 = buf[0], *buf1 = buf[1],
yading@11 1041 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
yading@11 1042
yading@11 1043 //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
yading@11 1044 __asm__ volatile(
yading@11 1045 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
yading@11 1046 "mov %4, %%"REG_b" \n\t"
yading@11 1047 "push %%"REG_BP" \n\t"
yading@11 1048 YSCALEYUV2PACKED(%%REGBP, %5)
yading@11 1049 WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
yading@11 1050 "pop %%"REG_BP" \n\t"
yading@11 1051 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
yading@11 1052 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
yading@11 1053 "a" (&c->redDither)
yading@11 1054 );
yading@11 1055 }
yading@11 1056
yading@11 1057 #define REAL_YSCALEYUV2RGB1(index, c) \
yading@11 1058 "xor "#index", "#index" \n\t"\
yading@11 1059 ".p2align 4 \n\t"\
yading@11 1060 "1: \n\t"\
yading@11 1061 "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\
yading@11 1062 "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
yading@11 1063 "movq (%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
yading@11 1064 "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
yading@11 1065 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
yading@11 1066 "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
yading@11 1067 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\
yading@11 1068 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\
yading@11 1069 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
yading@11 1070 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
yading@11 1071 "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
yading@11 1072 "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
yading@11 1073 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
yading@11 1074 "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
yading@11 1075 "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
yading@11 1076 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
yading@11 1077 "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
yading@11 1078 "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
yading@11 1079 "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
yading@11 1080 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\
yading@11 1081 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\
yading@11 1082 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
yading@11 1083 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
yading@11 1084 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
yading@11 1085 "paddw %%mm3, %%mm4 \n\t"\
yading@11 1086 "movq %%mm2, %%mm0 \n\t"\
yading@11 1087 "movq %%mm5, %%mm6 \n\t"\
yading@11 1088 "movq %%mm4, %%mm3 \n\t"\
yading@11 1089 "punpcklwd %%mm2, %%mm2 \n\t"\
yading@11 1090 "punpcklwd %%mm5, %%mm5 \n\t"\
yading@11 1091 "punpcklwd %%mm4, %%mm4 \n\t"\
yading@11 1092 "paddw %%mm1, %%mm2 \n\t"\
yading@11 1093 "paddw %%mm1, %%mm5 \n\t"\
yading@11 1094 "paddw %%mm1, %%mm4 \n\t"\
yading@11 1095 "punpckhwd %%mm0, %%mm0 \n\t"\
yading@11 1096 "punpckhwd %%mm6, %%mm6 \n\t"\
yading@11 1097 "punpckhwd %%mm3, %%mm3 \n\t"\
yading@11 1098 "paddw %%mm7, %%mm0 \n\t"\
yading@11 1099 "paddw %%mm7, %%mm6 \n\t"\
yading@11 1100 "paddw %%mm7, %%mm3 \n\t"\
yading@11 1101 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
yading@11 1102 "packuswb %%mm0, %%mm2 \n\t"\
yading@11 1103 "packuswb %%mm6, %%mm5 \n\t"\
yading@11 1104 "packuswb %%mm3, %%mm4 \n\t"\
yading@11 1105
yading@11 1106 #define YSCALEYUV2RGB1(index, c) REAL_YSCALEYUV2RGB1(index, c)
yading@11 1107
yading@11 1108 // do vertical chrominance interpolation
yading@11 1109 #define REAL_YSCALEYUV2RGB1b(index, c) \
yading@11 1110 "xor "#index", "#index" \n\t"\
yading@11 1111 ".p2align 4 \n\t"\
yading@11 1112 "1: \n\t"\
yading@11 1113 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
yading@11 1114 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
yading@11 1115 "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
yading@11 1116 "movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
yading@11 1117 "movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
yading@11 1118 "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
yading@11 1119 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
yading@11 1120 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
yading@11 1121 "psrlw $5, %%mm3 \n\t" /*FIXME might overflow*/\
yading@11 1122 "psrlw $5, %%mm4 \n\t" /*FIXME might overflow*/\
yading@11 1123 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\
yading@11 1124 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\
yading@11 1125 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
yading@11 1126 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
yading@11 1127 "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
yading@11 1128 "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
yading@11 1129 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
yading@11 1130 "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
yading@11 1131 "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
yading@11 1132 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
yading@11 1133 "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
yading@11 1134 "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
yading@11 1135 "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
yading@11 1136 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\
yading@11 1137 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\
yading@11 1138 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
yading@11 1139 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
yading@11 1140 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
yading@11 1141 "paddw %%mm3, %%mm4 \n\t"\
yading@11 1142 "movq %%mm2, %%mm0 \n\t"\
yading@11 1143 "movq %%mm5, %%mm6 \n\t"\
yading@11 1144 "movq %%mm4, %%mm3 \n\t"\
yading@11 1145 "punpcklwd %%mm2, %%mm2 \n\t"\
yading@11 1146 "punpcklwd %%mm5, %%mm5 \n\t"\
yading@11 1147 "punpcklwd %%mm4, %%mm4 \n\t"\
yading@11 1148 "paddw %%mm1, %%mm2 \n\t"\
yading@11 1149 "paddw %%mm1, %%mm5 \n\t"\
yading@11 1150 "paddw %%mm1, %%mm4 \n\t"\
yading@11 1151 "punpckhwd %%mm0, %%mm0 \n\t"\
yading@11 1152 "punpckhwd %%mm6, %%mm6 \n\t"\
yading@11 1153 "punpckhwd %%mm3, %%mm3 \n\t"\
yading@11 1154 "paddw %%mm7, %%mm0 \n\t"\
yading@11 1155 "paddw %%mm7, %%mm6 \n\t"\
yading@11 1156 "paddw %%mm7, %%mm3 \n\t"\
yading@11 1157 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
yading@11 1158 "packuswb %%mm0, %%mm2 \n\t"\
yading@11 1159 "packuswb %%mm6, %%mm5 \n\t"\
yading@11 1160 "packuswb %%mm3, %%mm4 \n\t"\
yading@11 1161
yading@11 1162 #define YSCALEYUV2RGB1b(index, c) REAL_YSCALEYUV2RGB1b(index, c)
yading@11 1163
yading@11 1164 #define REAL_YSCALEYUV2RGB1_ALPHA(index) \
yading@11 1165 "movq (%1, "#index", 2), %%mm7 \n\t" /* abuf0[index ] */\
yading@11 1166 "movq 8(%1, "#index", 2), %%mm1 \n\t" /* abuf0[index+4] */\
yading@11 1167 "psraw $7, %%mm7 \n\t" /* abuf0[index ] >>7 */\
yading@11 1168 "psraw $7, %%mm1 \n\t" /* abuf0[index+4] >>7 */\
yading@11 1169 "packuswb %%mm1, %%mm7 \n\t"
yading@11 1170 #define YSCALEYUV2RGB1_ALPHA(index) REAL_YSCALEYUV2RGB1_ALPHA(index)
yading@11 1171
yading@11 1172 /**
yading@11 1173 * YV12 to RGB without scaling or interpolating
yading@11 1174 */
yading@11 1175 static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0,
yading@11 1176 const int16_t *ubuf[2], const int16_t *vbuf[2],
yading@11 1177 const int16_t *abuf0, uint8_t *dest,
yading@11 1178 int dstW, int uvalpha, int y)
yading@11 1179 {
yading@11 1180 const int16_t *ubuf0 = ubuf[0];
yading@11 1181 const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
yading@11 1182
yading@11 1183 if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
yading@11 1184 const int16_t *ubuf1 = ubuf[0];
yading@11 1185 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
yading@11 1186 __asm__ volatile(
yading@11 1187 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
yading@11 1188 "mov %4, %%"REG_b" \n\t"
yading@11 1189 "push %%"REG_BP" \n\t"
yading@11 1190 YSCALEYUV2RGB1(%%REGBP, %5)
yading@11 1191 YSCALEYUV2RGB1_ALPHA(%%REGBP)
yading@11 1192 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
yading@11 1193 "pop %%"REG_BP" \n\t"
yading@11 1194 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
yading@11 1195 :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest),
yading@11 1196 "a" (&c->redDither)
yading@11 1197 );
yading@11 1198 } else {
yading@11 1199 __asm__ volatile(
yading@11 1200 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
yading@11 1201 "mov %4, %%"REG_b" \n\t"
yading@11 1202 "push %%"REG_BP" \n\t"
yading@11 1203 YSCALEYUV2RGB1(%%REGBP, %5)
yading@11 1204 "pcmpeqd %%mm7, %%mm7 \n\t"
yading@11 1205 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
yading@11 1206 "pop %%"REG_BP" \n\t"
yading@11 1207 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
yading@11 1208 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
yading@11 1209 "a" (&c->redDither)
yading@11 1210 );
yading@11 1211 }
yading@11 1212 } else {
yading@11 1213 const int16_t *ubuf1 = ubuf[1];
yading@11 1214 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
yading@11 1215 __asm__ volatile(
yading@11 1216 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
yading@11 1217 "mov %4, %%"REG_b" \n\t"
yading@11 1218 "push %%"REG_BP" \n\t"
yading@11 1219 YSCALEYUV2RGB1b(%%REGBP, %5)
yading@11 1220 YSCALEYUV2RGB1_ALPHA(%%REGBP)
yading@11 1221 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
yading@11 1222 "pop %%"REG_BP" \n\t"
yading@11 1223 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
yading@11 1224 :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest),
yading@11 1225 "a" (&c->redDither)
yading@11 1226 );
yading@11 1227 } else {
yading@11 1228 __asm__ volatile(
yading@11 1229 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
yading@11 1230 "mov %4, %%"REG_b" \n\t"
yading@11 1231 "push %%"REG_BP" \n\t"
yading@11 1232 YSCALEYUV2RGB1b(%%REGBP, %5)
yading@11 1233 "pcmpeqd %%mm7, %%mm7 \n\t"
yading@11 1234 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
yading@11 1235 "pop %%"REG_BP" \n\t"
yading@11 1236 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
yading@11 1237 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
yading@11 1238 "a" (&c->redDither)
yading@11 1239 );
yading@11 1240 }
yading@11 1241 }
yading@11 1242 }
yading@11 1243
yading@11 1244 static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0,
yading@11 1245 const int16_t *ubuf[2], const int16_t *vbuf[2],
yading@11 1246 const int16_t *abuf0, uint8_t *dest,
yading@11 1247 int dstW, int uvalpha, int y)
yading@11 1248 {
yading@11 1249 const int16_t *ubuf0 = ubuf[0];
yading@11 1250 const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
yading@11 1251
yading@11 1252 if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
yading@11 1253 const int16_t *ubuf1 = ubuf[0];
yading@11 1254 __asm__ volatile(
yading@11 1255 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
yading@11 1256 "mov %4, %%"REG_b" \n\t"
yading@11 1257 "push %%"REG_BP" \n\t"
yading@11 1258 YSCALEYUV2RGB1(%%REGBP, %5)
yading@11 1259 "pxor %%mm7, %%mm7 \n\t"
yading@11 1260 WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
yading@11 1261 "pop %%"REG_BP" \n\t"
yading@11 1262 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
yading@11 1263 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
yading@11 1264 "a" (&c->redDither)
yading@11 1265 );
yading@11 1266 } else {
yading@11 1267 const int16_t *ubuf1 = ubuf[1];
yading@11 1268 __asm__ volatile(
yading@11 1269 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
yading@11 1270 "mov %4, %%"REG_b" \n\t"
yading@11 1271 "push %%"REG_BP" \n\t"
yading@11 1272 YSCALEYUV2RGB1b(%%REGBP, %5)
yading@11 1273 "pxor %%mm7, %%mm7 \n\t"
yading@11 1274 WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
yading@11 1275 "pop %%"REG_BP" \n\t"
yading@11 1276 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
yading@11 1277 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
yading@11 1278 "a" (&c->redDither)
yading@11 1279 );
yading@11 1280 }
yading@11 1281 }
yading@11 1282
yading@11 1283 static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0,
yading@11 1284 const int16_t *ubuf[2], const int16_t *vbuf[2],
yading@11 1285 const int16_t *abuf0, uint8_t *dest,
yading@11 1286 int dstW, int uvalpha, int y)
yading@11 1287 {
yading@11 1288 const int16_t *ubuf0 = ubuf[0];
yading@11 1289 const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
yading@11 1290
yading@11 1291 if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
yading@11 1292 const int16_t *ubuf1 = ubuf[0];
yading@11 1293 __asm__ volatile(
yading@11 1294 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
yading@11 1295 "mov %4, %%"REG_b" \n\t"
yading@11 1296 "push %%"REG_BP" \n\t"
yading@11 1297 YSCALEYUV2RGB1(%%REGBP, %5)
yading@11 1298 "pxor %%mm7, %%mm7 \n\t"
yading@11 1299 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
yading@11 1300 #ifdef DITHER1XBPP
yading@11 1301 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
yading@11 1302 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
yading@11 1303 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
yading@11 1304 #endif
yading@11 1305 WRITERGB15(%%REGb, 8280(%5), %%REGBP)
yading@11 1306 "pop %%"REG_BP" \n\t"
yading@11 1307 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
yading@11 1308 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
yading@11 1309 "a" (&c->redDither)
yading@11 1310 );
yading@11 1311 } else {
yading@11 1312 const int16_t *ubuf1 = ubuf[1];
yading@11 1313 __asm__ volatile(
yading@11 1314 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
yading@11 1315 "mov %4, %%"REG_b" \n\t"
yading@11 1316 "push %%"REG_BP" \n\t"
yading@11 1317 YSCALEYUV2RGB1b(%%REGBP, %5)
yading@11 1318 "pxor %%mm7, %%mm7 \n\t"
yading@11 1319 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
yading@11 1320 #ifdef DITHER1XBPP
yading@11 1321 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
yading@11 1322 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
yading@11 1323 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
yading@11 1324 #endif
yading@11 1325 WRITERGB15(%%REGb, 8280(%5), %%REGBP)
yading@11 1326 "pop %%"REG_BP" \n\t"
yading@11 1327 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
yading@11 1328 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
yading@11 1329 "a" (&c->redDither)
yading@11 1330 );
yading@11 1331 }
yading@11 1332 }
yading@11 1333
yading@11 1334 static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
yading@11 1335 const int16_t *ubuf[2], const int16_t *vbuf[2],
yading@11 1336 const int16_t *abuf0, uint8_t *dest,
yading@11 1337 int dstW, int uvalpha, int y)
yading@11 1338 {
yading@11 1339 const int16_t *ubuf0 = ubuf[0];
yading@11 1340 const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
yading@11 1341
yading@11 1342 if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
yading@11 1343 const int16_t *ubuf1 = ubuf[0];
yading@11 1344 __asm__ volatile(
yading@11 1345 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
yading@11 1346 "mov %4, %%"REG_b" \n\t"
yading@11 1347 "push %%"REG_BP" \n\t"
yading@11 1348 YSCALEYUV2RGB1(%%REGBP, %5)
yading@11 1349 "pxor %%mm7, %%mm7 \n\t"
yading@11 1350 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
yading@11 1351 #ifdef DITHER1XBPP
yading@11 1352 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
yading@11 1353 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
yading@11 1354 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
yading@11 1355 #endif
yading@11 1356 WRITERGB16(%%REGb, 8280(%5), %%REGBP)
yading@11 1357 "pop %%"REG_BP" \n\t"
yading@11 1358 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
yading@11 1359 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
yading@11 1360 "a" (&c->redDither)
yading@11 1361 );
yading@11 1362 } else {
yading@11 1363 const int16_t *ubuf1 = ubuf[1];
yading@11 1364 __asm__ volatile(
yading@11 1365 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
yading@11 1366 "mov %4, %%"REG_b" \n\t"
yading@11 1367 "push %%"REG_BP" \n\t"
yading@11 1368 YSCALEYUV2RGB1b(%%REGBP, %5)
yading@11 1369 "pxor %%mm7, %%mm7 \n\t"
yading@11 1370 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
yading@11 1371 #ifdef DITHER1XBPP
yading@11 1372 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
yading@11 1373 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
yading@11 1374 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
yading@11 1375 #endif
yading@11 1376 WRITERGB16(%%REGb, 8280(%5), %%REGBP)
yading@11 1377 "pop %%"REG_BP" \n\t"
yading@11 1378 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
yading@11 1379 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
yading@11 1380 "a" (&c->redDither)
yading@11 1381 );
yading@11 1382 }
yading@11 1383 }
yading@11 1384
yading@11 1385 #define REAL_YSCALEYUV2PACKED1(index, c) \
yading@11 1386 "xor "#index", "#index" \n\t"\
yading@11 1387 ".p2align 4 \n\t"\
yading@11 1388 "1: \n\t"\
yading@11 1389 "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\
yading@11 1390 "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
yading@11 1391 "movq (%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
yading@11 1392 "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
yading@11 1393 "psraw $7, %%mm3 \n\t" \
yading@11 1394 "psraw $7, %%mm4 \n\t" \
yading@11 1395 "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
yading@11 1396 "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
yading@11 1397 "psraw $7, %%mm1 \n\t" \
yading@11 1398 "psraw $7, %%mm7 \n\t" \
yading@11 1399
yading@11 1400 #define YSCALEYUV2PACKED1(index, c) REAL_YSCALEYUV2PACKED1(index, c)
yading@11 1401
yading@11 1402 #define REAL_YSCALEYUV2PACKED1b(index, c) \
yading@11 1403 "xor "#index", "#index" \n\t"\
yading@11 1404 ".p2align 4 \n\t"\
yading@11 1405 "1: \n\t"\
yading@11 1406 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
yading@11 1407 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
yading@11 1408 "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
yading@11 1409 "movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
yading@11 1410 "movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
yading@11 1411 "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
yading@11 1412 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
yading@11 1413 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
yading@11 1414 "psrlw $8, %%mm3 \n\t" \
yading@11 1415 "psrlw $8, %%mm4 \n\t" \
yading@11 1416 "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
yading@11 1417 "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
yading@11 1418 "psraw $7, %%mm1 \n\t" \
yading@11 1419 "psraw $7, %%mm7 \n\t"
yading@11 1420 #define YSCALEYUV2PACKED1b(index, c) REAL_YSCALEYUV2PACKED1b(index, c)
yading@11 1421
yading@11 1422 static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0,
yading@11 1423 const int16_t *ubuf[2], const int16_t *vbuf[2],
yading@11 1424 const int16_t *abuf0, uint8_t *dest,
yading@11 1425 int dstW, int uvalpha, int y)
yading@11 1426 {
yading@11 1427 const int16_t *ubuf0 = ubuf[0];
yading@11 1428 const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
yading@11 1429
yading@11 1430 if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
yading@11 1431 const int16_t *ubuf1 = ubuf[0];
yading@11 1432 __asm__ volatile(
yading@11 1433 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
yading@11 1434 "mov %4, %%"REG_b" \n\t"
yading@11 1435 "push %%"REG_BP" \n\t"
yading@11 1436 YSCALEYUV2PACKED1(%%REGBP, %5)
yading@11 1437 WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
yading@11 1438 "pop %%"REG_BP" \n\t"
yading@11 1439 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
yading@11 1440 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
yading@11 1441 "a" (&c->redDither)
yading@11 1442 );
yading@11 1443 } else {
yading@11 1444 const int16_t *ubuf1 = ubuf[1];
yading@11 1445 __asm__ volatile(
yading@11 1446 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
yading@11 1447 "mov %4, %%"REG_b" \n\t"
yading@11 1448 "push %%"REG_BP" \n\t"
yading@11 1449 YSCALEYUV2PACKED1b(%%REGBP, %5)
yading@11 1450 WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
yading@11 1451 "pop %%"REG_BP" \n\t"
yading@11 1452 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
yading@11 1453 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
yading@11 1454 "a" (&c->redDither)
yading@11 1455 );
yading@11 1456 }
yading@11 1457 }
yading@11 1458
yading@11 1459 #if COMPILE_TEMPLATE_MMXEXT
yading@11 1460 static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
yading@11 1461 int dstWidth, const uint8_t *src,
yading@11 1462 int srcW, int xInc)
yading@11 1463 {
yading@11 1464 int32_t *filterPos = c->hLumFilterPos;
yading@11 1465 int16_t *filter = c->hLumFilter;
yading@11 1466 void *mmxextFilterCode = c->lumMmxextFilterCode;
yading@11 1467 int i;
yading@11 1468 #if defined(PIC)
yading@11 1469 uint64_t ebxsave;
yading@11 1470 #endif
yading@11 1471 #if ARCH_X86_64
yading@11 1472 uint64_t retsave;
yading@11 1473 #endif
yading@11 1474
yading@11 1475 __asm__ volatile(
yading@11 1476 #if defined(PIC)
yading@11 1477 "mov %%"REG_b", %5 \n\t"
yading@11 1478 #if ARCH_X86_64
yading@11 1479 "mov -8(%%rsp), %%"REG_a" \n\t"
yading@11 1480 "mov %%"REG_a", %6 \n\t"
yading@11 1481 #endif
yading@11 1482 #else
yading@11 1483 #if ARCH_X86_64
yading@11 1484 "mov -8(%%rsp), %%"REG_a" \n\t"
yading@11 1485 "mov %%"REG_a", %5 \n\t"
yading@11 1486 #endif
yading@11 1487 #endif
yading@11 1488 "pxor %%mm7, %%mm7 \n\t"
yading@11 1489 "mov %0, %%"REG_c" \n\t"
yading@11 1490 "mov %1, %%"REG_D" \n\t"
yading@11 1491 "mov %2, %%"REG_d" \n\t"
yading@11 1492 "mov %3, %%"REG_b" \n\t"
yading@11 1493 "xor %%"REG_a", %%"REG_a" \n\t" // i
yading@11 1494 PREFETCH" (%%"REG_c") \n\t"
yading@11 1495 PREFETCH" 32(%%"REG_c") \n\t"
yading@11 1496 PREFETCH" 64(%%"REG_c") \n\t"
yading@11 1497
yading@11 1498 #if ARCH_X86_64
yading@11 1499 #define CALL_MMXEXT_FILTER_CODE \
yading@11 1500 "movl (%%"REG_b"), %%esi \n\t"\
yading@11 1501 "call *%4 \n\t"\
yading@11 1502 "movl (%%"REG_b", %%"REG_a"), %%esi \n\t"\
yading@11 1503 "add %%"REG_S", %%"REG_c" \n\t"\
yading@11 1504 "add %%"REG_a", %%"REG_D" \n\t"\
yading@11 1505 "xor %%"REG_a", %%"REG_a" \n\t"\
yading@11 1506
yading@11 1507 #else
yading@11 1508 #define CALL_MMXEXT_FILTER_CODE \
yading@11 1509 "movl (%%"REG_b"), %%esi \n\t"\
yading@11 1510 "call *%4 \n\t"\
yading@11 1511 "addl (%%"REG_b", %%"REG_a"), %%"REG_c" \n\t"\
yading@11 1512 "add %%"REG_a", %%"REG_D" \n\t"\
yading@11 1513 "xor %%"REG_a", %%"REG_a" \n\t"\
yading@11 1514
yading@11 1515 #endif /* ARCH_X86_64 */
yading@11 1516
yading@11 1517 CALL_MMXEXT_FILTER_CODE
yading@11 1518 CALL_MMXEXT_FILTER_CODE
yading@11 1519 CALL_MMXEXT_FILTER_CODE
yading@11 1520 CALL_MMXEXT_FILTER_CODE
yading@11 1521 CALL_MMXEXT_FILTER_CODE
yading@11 1522 CALL_MMXEXT_FILTER_CODE
yading@11 1523 CALL_MMXEXT_FILTER_CODE
yading@11 1524 CALL_MMXEXT_FILTER_CODE
yading@11 1525
yading@11 1526 #if defined(PIC)
yading@11 1527 "mov %5, %%"REG_b" \n\t"
yading@11 1528 #if ARCH_X86_64
yading@11 1529 "mov %6, %%"REG_a" \n\t"
yading@11 1530 "mov %%"REG_a", -8(%%rsp) \n\t"
yading@11 1531 #endif
yading@11 1532 #else
yading@11 1533 #if ARCH_X86_64
yading@11 1534 "mov %5, %%"REG_a" \n\t"
yading@11 1535 "mov %%"REG_a", -8(%%rsp) \n\t"
yading@11 1536 #endif
yading@11 1537 #endif
yading@11 1538 :: "m" (src), "m" (dst), "m" (filter), "m" (filterPos),
yading@11 1539 "m" (mmxextFilterCode)
yading@11 1540 #if defined(PIC)
yading@11 1541 ,"m" (ebxsave)
yading@11 1542 #endif
yading@11 1543 #if ARCH_X86_64
yading@11 1544 ,"m"(retsave)
yading@11 1545 #endif
yading@11 1546 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
yading@11 1547 #if !defined(PIC)
yading@11 1548 ,"%"REG_b
yading@11 1549 #endif
yading@11 1550 );
yading@11 1551
yading@11 1552 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
yading@11 1553 dst[i] = src[srcW-1]*128;
yading@11 1554 }
yading@11 1555
yading@11 1556 static void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2,
yading@11 1557 int dstWidth, const uint8_t *src1,
yading@11 1558 const uint8_t *src2, int srcW, int xInc)
yading@11 1559 {
yading@11 1560 int32_t *filterPos = c->hChrFilterPos;
yading@11 1561 int16_t *filter = c->hChrFilter;
yading@11 1562 void *mmxextFilterCode = c->chrMmxextFilterCode;
yading@11 1563 int i;
yading@11 1564 #if defined(PIC)
yading@11 1565 DECLARE_ALIGNED(8, uint64_t, ebxsave);
yading@11 1566 #endif
yading@11 1567 #if ARCH_X86_64
yading@11 1568 DECLARE_ALIGNED(8, uint64_t, retsave);
yading@11 1569 #endif
yading@11 1570
yading@11 1571 __asm__ volatile(
yading@11 1572 #if defined(PIC)
yading@11 1573 "mov %%"REG_b", %7 \n\t"
yading@11 1574 #if ARCH_X86_64
yading@11 1575 "mov -8(%%rsp), %%"REG_a" \n\t"
yading@11 1576 "mov %%"REG_a", %8 \n\t"
yading@11 1577 #endif
yading@11 1578 #else
yading@11 1579 #if ARCH_X86_64
yading@11 1580 "mov -8(%%rsp), %%"REG_a" \n\t"
yading@11 1581 "mov %%"REG_a", %7 \n\t"
yading@11 1582 #endif
yading@11 1583 #endif
yading@11 1584 "pxor %%mm7, %%mm7 \n\t"
yading@11 1585 "mov %0, %%"REG_c" \n\t"
yading@11 1586 "mov %1, %%"REG_D" \n\t"
yading@11 1587 "mov %2, %%"REG_d" \n\t"
yading@11 1588 "mov %3, %%"REG_b" \n\t"
yading@11 1589 "xor %%"REG_a", %%"REG_a" \n\t" // i
yading@11 1590 PREFETCH" (%%"REG_c") \n\t"
yading@11 1591 PREFETCH" 32(%%"REG_c") \n\t"
yading@11 1592 PREFETCH" 64(%%"REG_c") \n\t"
yading@11 1593
yading@11 1594 CALL_MMXEXT_FILTER_CODE
yading@11 1595 CALL_MMXEXT_FILTER_CODE
yading@11 1596 CALL_MMXEXT_FILTER_CODE
yading@11 1597 CALL_MMXEXT_FILTER_CODE
yading@11 1598 "xor %%"REG_a", %%"REG_a" \n\t" // i
yading@11 1599 "mov %5, %%"REG_c" \n\t" // src
yading@11 1600 "mov %6, %%"REG_D" \n\t" // buf2
yading@11 1601 PREFETCH" (%%"REG_c") \n\t"
yading@11 1602 PREFETCH" 32(%%"REG_c") \n\t"
yading@11 1603 PREFETCH" 64(%%"REG_c") \n\t"
yading@11 1604
yading@11 1605 CALL_MMXEXT_FILTER_CODE
yading@11 1606 CALL_MMXEXT_FILTER_CODE
yading@11 1607 CALL_MMXEXT_FILTER_CODE
yading@11 1608 CALL_MMXEXT_FILTER_CODE
yading@11 1609
yading@11 1610 #if defined(PIC)
yading@11 1611 "mov %7, %%"REG_b" \n\t"
yading@11 1612 #if ARCH_X86_64
yading@11 1613 "mov %8, %%"REG_a" \n\t"
yading@11 1614 "mov %%"REG_a", -8(%%rsp) \n\t"
yading@11 1615 #endif
yading@11 1616 #else
yading@11 1617 #if ARCH_X86_64
yading@11 1618 "mov %7, %%"REG_a" \n\t"
yading@11 1619 "mov %%"REG_a", -8(%%rsp) \n\t"
yading@11 1620 #endif
yading@11 1621 #endif
yading@11 1622 :: "m" (src1), "m" (dst1), "m" (filter), "m" (filterPos),
yading@11 1623 "m" (mmxextFilterCode), "m" (src2), "m"(dst2)
yading@11 1624 #if defined(PIC)
yading@11 1625 ,"m" (ebxsave)
yading@11 1626 #endif
yading@11 1627 #if ARCH_X86_64
yading@11 1628 ,"m"(retsave)
yading@11 1629 #endif
yading@11 1630 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
yading@11 1631 #if !defined(PIC)
yading@11 1632 ,"%"REG_b
yading@11 1633 #endif
yading@11 1634 );
yading@11 1635
yading@11 1636 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
yading@11 1637 dst1[i] = src1[srcW-1]*128;
yading@11 1638 dst2[i] = src2[srcW-1]*128;
yading@11 1639 }
yading@11 1640 }
yading@11 1641 #endif /* COMPILE_TEMPLATE_MMXEXT */
yading@11 1642
yading@11 1643 static av_cold void RENAME(sws_init_swScale)(SwsContext *c)
yading@11 1644 {
yading@11 1645 enum AVPixelFormat dstFormat = c->dstFormat;
yading@11 1646
yading@11 1647 c->use_mmx_vfilter= 0;
yading@11 1648 if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat) && dstFormat != AV_PIX_FMT_NV12
yading@11 1649 && dstFormat != AV_PIX_FMT_NV21 && !(c->flags & SWS_BITEXACT)) {
yading@11 1650 if (c->flags & SWS_ACCURATE_RND) {
yading@11 1651 if (!(c->flags & SWS_FULL_CHR_H_INT)) {
yading@11 1652 switch (c->dstFormat) {
yading@11 1653 case AV_PIX_FMT_RGB32: c->yuv2packedX = RENAME(yuv2rgb32_X_ar); break;
yading@11 1654 case AV_PIX_FMT_BGR24: c->yuv2packedX = RENAME(yuv2bgr24_X_ar); break;
yading@11 1655 case AV_PIX_FMT_RGB555: c->yuv2packedX = RENAME(yuv2rgb555_X_ar); break;
yading@11 1656 case AV_PIX_FMT_RGB565: c->yuv2packedX = RENAME(yuv2rgb565_X_ar); break;
yading@11 1657 case AV_PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X_ar); break;
yading@11 1658 default: break;
yading@11 1659 }
yading@11 1660 }
yading@11 1661 } else {
yading@11 1662 c->use_mmx_vfilter= 1;
yading@11 1663 c->yuv2planeX = RENAME(yuv2yuvX );
yading@11 1664 if (!(c->flags & SWS_FULL_CHR_H_INT)) {
yading@11 1665 switch (c->dstFormat) {
yading@11 1666 case AV_PIX_FMT_RGB32: c->yuv2packedX = RENAME(yuv2rgb32_X); break;
yading@11 1667 case AV_PIX_FMT_BGR24: c->yuv2packedX = RENAME(yuv2bgr24_X); break;
yading@11 1668 case AV_PIX_FMT_RGB555: c->yuv2packedX = RENAME(yuv2rgb555_X); break;
yading@11 1669 case AV_PIX_FMT_RGB565: c->yuv2packedX = RENAME(yuv2rgb565_X); break;
yading@11 1670 case AV_PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X); break;
yading@11 1671 default: break;
yading@11 1672 }
yading@11 1673 }
yading@11 1674 }
yading@11 1675 if (!(c->flags & SWS_FULL_CHR_H_INT)) {
yading@11 1676 switch (c->dstFormat) {
yading@11 1677 case AV_PIX_FMT_RGB32:
yading@11 1678 c->yuv2packed1 = RENAME(yuv2rgb32_1);
yading@11 1679 c->yuv2packed2 = RENAME(yuv2rgb32_2);
yading@11 1680 break;
yading@11 1681 case AV_PIX_FMT_BGR24:
yading@11 1682 c->yuv2packed1 = RENAME(yuv2bgr24_1);
yading@11 1683 c->yuv2packed2 = RENAME(yuv2bgr24_2);
yading@11 1684 break;
yading@11 1685 case AV_PIX_FMT_RGB555:
yading@11 1686 c->yuv2packed1 = RENAME(yuv2rgb555_1);
yading@11 1687 c->yuv2packed2 = RENAME(yuv2rgb555_2);
yading@11 1688 break;
yading@11 1689 case AV_PIX_FMT_RGB565:
yading@11 1690 c->yuv2packed1 = RENAME(yuv2rgb565_1);
yading@11 1691 c->yuv2packed2 = RENAME(yuv2rgb565_2);
yading@11 1692 break;
yading@11 1693 case AV_PIX_FMT_YUYV422:
yading@11 1694 c->yuv2packed1 = RENAME(yuv2yuyv422_1);
yading@11 1695 c->yuv2packed2 = RENAME(yuv2yuyv422_2);
yading@11 1696 break;
yading@11 1697 default:
yading@11 1698 break;
yading@11 1699 }
yading@11 1700 }
yading@11 1701 }
yading@11 1702
yading@11 1703 if (c->srcBpc == 8 && c->dstBpc <= 14) {
yading@11 1704 // Use the new MMX scaler if the MMXEXT one can't be used (it is faster than the x86 ASM one).
yading@11 1705 #if COMPILE_TEMPLATE_MMXEXT
yading@11 1706 if (c->flags & SWS_FAST_BILINEAR && c->canMMXEXTBeUsed) {
yading@11 1707 c->hyscale_fast = RENAME(hyscale_fast);
yading@11 1708 c->hcscale_fast = RENAME(hcscale_fast);
yading@11 1709 } else {
yading@11 1710 #endif /* COMPILE_TEMPLATE_MMXEXT */
yading@11 1711 c->hyscale_fast = NULL;
yading@11 1712 c->hcscale_fast = NULL;
yading@11 1713 #if COMPILE_TEMPLATE_MMXEXT
yading@11 1714 }
yading@11 1715 #endif /* COMPILE_TEMPLATE_MMXEXT */
yading@11 1716 }
yading@11 1717 }