annotate ffmpeg/libavcodec/x86/h264_qpel.c @ 13:844d341cf643 tip

Back up before ISMIR
author Yading Song <yading.song@eecs.qmul.ac.uk>
date Thu, 31 Oct 2013 13:17:06 +0000
parents 6840f77b83aa
children
rev   line source
yading@10 1 /*
yading@10 2 * Copyright (c) 2004-2005 Michael Niedermayer, Loren Merritt
yading@10 3 * Copyright (c) 2011 Daniel Kang
yading@10 4 *
yading@10 5 * This file is part of FFmpeg.
yading@10 6 *
yading@10 7 * FFmpeg is free software; you can redistribute it and/or
yading@10 8 * modify it under the terms of the GNU Lesser General Public
yading@10 9 * License as published by the Free Software Foundation; either
yading@10 10 * version 2.1 of the License, or (at your option) any later version.
yading@10 11 *
yading@10 12 * FFmpeg is distributed in the hope that it will be useful,
yading@10 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
yading@10 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
yading@10 15 * Lesser General Public License for more details.
yading@10 16 *
yading@10 17 * You should have received a copy of the GNU Lesser General Public
yading@10 18 * License along with FFmpeg; if not, write to the Free Software
yading@10 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
yading@10 20 */
yading@10 21
yading@10 22 #include "libavutil/attributes.h"
yading@10 23 #include "libavutil/cpu.h"
yading@10 24 #include "libavutil/x86/asm.h"
yading@10 25 #include "libavutil/x86/cpu.h"
yading@10 26 #include "libavcodec/h264qpel.h"
yading@10 27 #include "libavcodec/mpegvideo.h"
yading@10 28 #include "dsputil_mmx.h"
yading@10 29
yading@10 30 #if HAVE_YASM
yading@10 31 void ff_put_pixels4_mmxext(uint8_t *block, const uint8_t *pixels,
yading@10 32 ptrdiff_t line_size, int h);
yading@10 33 void ff_avg_pixels4_mmxext(uint8_t *block, const uint8_t *pixels,
yading@10 34 ptrdiff_t line_size, int h);
yading@10 35 static void ff_put_pixels16_mmxext(uint8_t *block, const uint8_t *pixels,
yading@10 36 ptrdiff_t line_size, int h)
yading@10 37 {
yading@10 38 ff_put_pixels8_mmxext(block, pixels, line_size, h);
yading@10 39 ff_put_pixels8_mmxext(block + 8, pixels + 8, line_size, h);
yading@10 40 }
yading@10 41 static void ff_avg_pixels16_mmxext(uint8_t *block, const uint8_t *pixels,
yading@10 42 ptrdiff_t line_size, int h)
yading@10 43 {
yading@10 44 ff_avg_pixels8_mmxext(block, pixels, line_size, h);
yading@10 45 ff_avg_pixels8_mmxext(block + 8, pixels + 8, line_size, h);
yading@10 46 }
yading@10 47 void ff_put_pixels4_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
yading@10 48 int dstStride, int src1Stride, int h);
yading@10 49 void ff_avg_pixels4_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
yading@10 50 int dstStride, int src1Stride, int h);
yading@10 51 void ff_put_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
yading@10 52 int dstStride, int src1Stride, int h);
yading@10 53 void ff_avg_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
yading@10 54 int dstStride, int src1Stride, int h);
yading@10 55 void ff_put_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
yading@10 56 int dstStride, int src1Stride, int h);
yading@10 57 void ff_avg_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
yading@10 58 int dstStride, int src1Stride, int h);
yading@10 59 #define ff_put_pixels8_l2_sse2 ff_put_pixels8_l2_mmxext
yading@10 60 #define ff_avg_pixels8_l2_sse2 ff_avg_pixels8_l2_mmxext
yading@10 61 #define ff_put_pixels16_l2_sse2 ff_put_pixels16_l2_mmxext
yading@10 62 #define ff_avg_pixels16_l2_sse2 ff_avg_pixels16_l2_mmxext
yading@10 63
yading@10 64 #define DEF_QPEL(OPNAME)\
yading@10 65 void ff_ ## OPNAME ## _h264_qpel4_h_lowpass_mmxext(uint8_t *dst, uint8_t *src, int dstStride, int srcStride);\
yading@10 66 void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_mmxext(uint8_t *dst, uint8_t *src, int dstStride, int srcStride);\
yading@10 67 void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_ssse3(uint8_t *dst, uint8_t *src, int dstStride, int srcStride);\
yading@10 68 void ff_ ## OPNAME ## _h264_qpel4_h_lowpass_l2_mmxext(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride);\
yading@10 69 void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_l2_mmxext(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride);\
yading@10 70 void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_l2_ssse3(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride);\
yading@10 71 void ff_ ## OPNAME ## _h264_qpel4_v_lowpass_mmxext(uint8_t *dst, uint8_t *src, int dstStride, int srcStride);\
yading@10 72 void ff_ ## OPNAME ## _h264_qpel8or16_v_lowpass_op_mmxext(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h);\
yading@10 73 void ff_ ## OPNAME ## _h264_qpel8or16_v_lowpass_sse2(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h);\
yading@10 74 void ff_ ## OPNAME ## _h264_qpel4_hv_lowpass_v_mmxext(uint8_t *src, int16_t *tmp, int srcStride);\
yading@10 75 void ff_ ## OPNAME ## _h264_qpel4_hv_lowpass_h_mmxext(int16_t *tmp, uint8_t *dst, int dstStride);\
yading@10 76 void ff_ ## OPNAME ## _h264_qpel8or16_hv1_lowpass_op_mmxext(uint8_t *src, int16_t *tmp, int srcStride, int size);\
yading@10 77 void ff_ ## OPNAME ## _h264_qpel8or16_hv1_lowpass_op_sse2(uint8_t *src, int16_t *tmp, int srcStride, int size);\
yading@10 78 void ff_ ## OPNAME ## _h264_qpel8or16_hv2_lowpass_op_mmxext(uint8_t *dst, int16_t *tmp, int dstStride, int unused, int h);\
yading@10 79 void ff_ ## OPNAME ## _h264_qpel8or16_hv2_lowpass_ssse3(uint8_t *dst, int16_t *tmp, int dstStride, int tmpStride, int size);\
yading@10 80 void ff_ ## OPNAME ## _pixels4_l2_shift5_mmxext(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h);\
yading@10 81 void ff_ ## OPNAME ## _pixels8_l2_shift5_mmxext(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h);
yading@10 82
yading@10 83 DEF_QPEL(avg)
yading@10 84 DEF_QPEL(put)
yading@10 85
yading@10 86 #define QPEL_H264(OPNAME, OP, MMX)\
yading@10 87 static av_always_inline void ff_ ## OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
yading@10 88 int w=3;\
yading@10 89 src -= 2*srcStride+2;\
yading@10 90 while(w--){\
yading@10 91 ff_ ## OPNAME ## h264_qpel4_hv_lowpass_v_mmxext(src, tmp, srcStride);\
yading@10 92 tmp += 4;\
yading@10 93 src += 4;\
yading@10 94 }\
yading@10 95 tmp -= 3*4;\
yading@10 96 ff_ ## OPNAME ## h264_qpel4_hv_lowpass_h_mmxext(tmp, dst, dstStride);\
yading@10 97 }\
yading@10 98 \
yading@10 99 static av_always_inline void ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
yading@10 100 src -= 2*srcStride;\
yading@10 101 ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_op_mmxext(dst, src, dstStride, srcStride, h);\
yading@10 102 src += 4;\
yading@10 103 dst += 4;\
yading@10 104 ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_op_mmxext(dst, src, dstStride, srcStride, h);\
yading@10 105 }\
yading@10 106 static av_always_inline void ff_ ## OPNAME ## h264_qpel8or16_hv1_lowpass_ ## MMX(int16_t *tmp, uint8_t *src, int tmpStride, int srcStride, int size){\
yading@10 107 int w = (size+8)>>2;\
yading@10 108 src -= 2*srcStride+2;\
yading@10 109 while(w--){\
yading@10 110 ff_ ## OPNAME ## h264_qpel8or16_hv1_lowpass_op_mmxext(src, tmp, srcStride, size);\
yading@10 111 tmp += 4;\
yading@10 112 src += 4;\
yading@10 113 }\
yading@10 114 }\
yading@10 115 static av_always_inline void ff_ ## OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, int dstStride, int tmpStride, int size){\
yading@10 116 int w = size>>4;\
yading@10 117 do{\
yading@10 118 ff_ ## OPNAME ## h264_qpel8or16_hv2_lowpass_op_mmxext(dst, tmp, dstStride, 0, size);\
yading@10 119 tmp += 8;\
yading@10 120 dst += 8;\
yading@10 121 }while(w--);\
yading@10 122 }\
yading@10 123 \
yading@10 124 static av_always_inline void ff_ ## OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
yading@10 125 ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 8);\
yading@10 126 }\
yading@10 127 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
yading@10 128 ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 16);\
yading@10 129 ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
yading@10 130 }\
yading@10 131 \
yading@10 132 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
yading@10 133 ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\
yading@10 134 ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
yading@10 135 src += 8*srcStride;\
yading@10 136 dst += 8*dstStride;\
yading@10 137 ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\
yading@10 138 ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
yading@10 139 }\
yading@10 140 \
yading@10 141 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\
yading@10 142 ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\
yading@10 143 ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\
yading@10 144 src += 8*dstStride;\
yading@10 145 dst += 8*dstStride;\
yading@10 146 src2 += 8*src2Stride;\
yading@10 147 ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\
yading@10 148 ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\
yading@10 149 }\
yading@10 150 \
yading@10 151 static av_always_inline void ff_ ## OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride, int size){\
yading@10 152 ff_put_h264_qpel8or16_hv1_lowpass_ ## MMX(tmp, src, tmpStride, srcStride, size);\
yading@10 153 ff_ ## OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(dst, tmp, dstStride, tmpStride, size);\
yading@10 154 }\
yading@10 155 static av_always_inline void ff_ ## OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
yading@10 156 ff_ ## OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst , tmp , src , dstStride, tmpStride, srcStride, 8);\
yading@10 157 }\
yading@10 158 \
yading@10 159 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
yading@10 160 ff_ ## OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst , tmp , src , dstStride, tmpStride, srcStride, 16);\
yading@10 161 }\
yading@10 162 \
yading@10 163 static av_always_inline void ff_ ## OPNAME ## pixels16_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\
yading@10 164 {\
yading@10 165 ff_ ## OPNAME ## pixels8_l2_shift5_ ## MMX(dst , src16 , src8 , dstStride, src8Stride, h);\
yading@10 166 ff_ ## OPNAME ## pixels8_l2_shift5_ ## MMX(dst+8, src16+8, src8+8, dstStride, src8Stride, h);\
yading@10 167 }\
yading@10 168
yading@10 169
yading@10 170 #if ARCH_X86_64
yading@10 171 #define QPEL_H264_H16_XMM(OPNAME, OP, MMX)\
yading@10 172
yading@10 173 void ff_avg_h264_qpel16_h_lowpass_l2_ssse3(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride);
yading@10 174 void ff_put_h264_qpel16_h_lowpass_l2_ssse3(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride);
yading@10 175
yading@10 176 #else // ARCH_X86_64
yading@10 177 #define QPEL_H264_H16_XMM(OPNAME, OP, MMX)\
yading@10 178 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\
yading@10 179 ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\
yading@10 180 ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\
yading@10 181 src += 8*dstStride;\
yading@10 182 dst += 8*dstStride;\
yading@10 183 src2 += 8*src2Stride;\
yading@10 184 ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\
yading@10 185 ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\
yading@10 186 }
yading@10 187 #endif // ARCH_X86_64
yading@10 188
yading@10 189 #define QPEL_H264_H_XMM(OPNAME, OP, MMX)\
yading@10 190 QPEL_H264_H16_XMM(OPNAME, OP, MMX)\
yading@10 191 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
yading@10 192 ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\
yading@10 193 ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
yading@10 194 src += 8*srcStride;\
yading@10 195 dst += 8*dstStride;\
yading@10 196 ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\
yading@10 197 ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
yading@10 198 }\
yading@10 199
yading@10 200 #define QPEL_H264_V_XMM(OPNAME, OP, MMX)\
yading@10 201 static av_always_inline void ff_ ## OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
yading@10 202 ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 8);\
yading@10 203 }\
yading@10 204 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
yading@10 205 ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 16);\
yading@10 206 ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
yading@10 207 }
yading@10 208
yading@10 209 static av_always_inline void ff_put_h264_qpel8or16_hv1_lowpass_sse2(int16_t *tmp, uint8_t *src, int tmpStride, int srcStride, int size){
yading@10 210 int w = (size+8)>>3;
yading@10 211 src -= 2*srcStride+2;
yading@10 212 while(w--){
yading@10 213 ff_put_h264_qpel8or16_hv1_lowpass_op_sse2(src, tmp, srcStride, size);
yading@10 214 tmp += 8;
yading@10 215 src += 8;
yading@10 216 }
yading@10 217 }
yading@10 218
yading@10 219 #define QPEL_H264_HV_XMM(OPNAME, OP, MMX)\
yading@10 220 static av_always_inline void ff_ ## OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride, int size){\
yading@10 221 ff_put_h264_qpel8or16_hv1_lowpass_sse2(tmp, src, tmpStride, srcStride, size);\
yading@10 222 ff_ ## OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(dst, tmp, dstStride, tmpStride, size);\
yading@10 223 }\
yading@10 224 static av_always_inline void ff_ ## OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
yading@10 225 ff_ ## OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst, tmp, src, dstStride, tmpStride, srcStride, 8);\
yading@10 226 }\
yading@10 227 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
yading@10 228 ff_ ## OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst, tmp, src, dstStride, tmpStride, srcStride, 16);\
yading@10 229 }\
yading@10 230
yading@10 231 #define ff_put_h264_qpel8_h_lowpass_l2_sse2 ff_put_h264_qpel8_h_lowpass_l2_mmxext
yading@10 232 #define ff_avg_h264_qpel8_h_lowpass_l2_sse2 ff_avg_h264_qpel8_h_lowpass_l2_mmxext
yading@10 233 #define ff_put_h264_qpel16_h_lowpass_l2_sse2 ff_put_h264_qpel16_h_lowpass_l2_mmxext
yading@10 234 #define ff_avg_h264_qpel16_h_lowpass_l2_sse2 ff_avg_h264_qpel16_h_lowpass_l2_mmxext
yading@10 235
yading@10 236 #define ff_put_h264_qpel8_v_lowpass_ssse3 ff_put_h264_qpel8_v_lowpass_sse2
yading@10 237 #define ff_avg_h264_qpel8_v_lowpass_ssse3 ff_avg_h264_qpel8_v_lowpass_sse2
yading@10 238 #define ff_put_h264_qpel16_v_lowpass_ssse3 ff_put_h264_qpel16_v_lowpass_sse2
yading@10 239 #define ff_avg_h264_qpel16_v_lowpass_ssse3 ff_avg_h264_qpel16_v_lowpass_sse2
yading@10 240
yading@10 241 #define ff_put_h264_qpel8or16_hv2_lowpass_sse2 ff_put_h264_qpel8or16_hv2_lowpass_mmxext
yading@10 242 #define ff_avg_h264_qpel8or16_hv2_lowpass_sse2 ff_avg_h264_qpel8or16_hv2_lowpass_mmxext
yading@10 243
yading@10 244 #define H264_MC(OPNAME, SIZE, MMX, ALIGN) \
yading@10 245 H264_MC_C(OPNAME, SIZE, MMX, ALIGN)\
yading@10 246 H264_MC_V(OPNAME, SIZE, MMX, ALIGN)\
yading@10 247 H264_MC_H(OPNAME, SIZE, MMX, ALIGN)\
yading@10 248 H264_MC_HV(OPNAME, SIZE, MMX, ALIGN)\
yading@10 249
yading@10 250 static void put_h264_qpel16_mc00_sse2 (uint8_t *dst, uint8_t *src,
yading@10 251 ptrdiff_t stride)
yading@10 252 {
yading@10 253 ff_put_pixels16_sse2(dst, src, stride, 16);
yading@10 254 }
yading@10 255 static void avg_h264_qpel16_mc00_sse2 (uint8_t *dst, uint8_t *src,
yading@10 256 ptrdiff_t stride)
yading@10 257 {
yading@10 258 ff_avg_pixels16_sse2(dst, src, stride, 16);
yading@10 259 }
yading@10 260 #define put_h264_qpel8_mc00_sse2 put_h264_qpel8_mc00_mmxext
yading@10 261 #define avg_h264_qpel8_mc00_sse2 avg_h264_qpel8_mc00_mmxext
yading@10 262
yading@10 263 #define H264_MC_C(OPNAME, SIZE, MMX, ALIGN) \
yading@10 264 static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## MMX (uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
yading@10 265 {\
yading@10 266 ff_ ## OPNAME ## pixels ## SIZE ## _ ## MMX(dst, src, stride, SIZE);\
yading@10 267 }\
yading@10 268
yading@10 269 #define H264_MC_H(OPNAME, SIZE, MMX, ALIGN) \
yading@10 270 static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
yading@10 271 {\
yading@10 272 ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, src, stride, stride);\
yading@10 273 }\
yading@10 274 \
yading@10 275 static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
yading@10 276 {\
yading@10 277 ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## MMX(dst, src, stride, stride);\
yading@10 278 }\
yading@10 279 \
yading@10 280 static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
yading@10 281 {\
yading@10 282 ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, src+1, stride, stride);\
yading@10 283 }\
yading@10 284
yading@10 285 #define H264_MC_V(OPNAME, SIZE, MMX, ALIGN) \
yading@10 286 static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
yading@10 287 {\
yading@10 288 DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\
yading@10 289 ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
yading@10 290 ff_ ## OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src, temp, stride, stride, SIZE);\
yading@10 291 }\
yading@10 292 \
yading@10 293 static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
yading@10 294 {\
yading@10 295 ff_ ## OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## MMX(dst, src, stride, stride);\
yading@10 296 }\
yading@10 297 \
yading@10 298 static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
yading@10 299 {\
yading@10 300 DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\
yading@10 301 ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
yading@10 302 ff_ ## OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src+stride, temp, stride, stride, SIZE);\
yading@10 303 }\
yading@10 304
yading@10 305 #define H264_MC_HV(OPNAME, SIZE, MMX, ALIGN) \
yading@10 306 static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
yading@10 307 {\
yading@10 308 DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\
yading@10 309 ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
yading@10 310 ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, temp, stride, SIZE);\
yading@10 311 }\
yading@10 312 \
yading@10 313 static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
yading@10 314 {\
yading@10 315 DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\
yading@10 316 ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src+1, SIZE, stride);\
yading@10 317 ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, temp, stride, SIZE);\
yading@10 318 }\
yading@10 319 \
yading@10 320 static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
yading@10 321 {\
yading@10 322 DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\
yading@10 323 ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
yading@10 324 ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, temp, stride, SIZE);\
yading@10 325 }\
yading@10 326 \
yading@10 327 static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
yading@10 328 {\
yading@10 329 DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\
yading@10 330 ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src+1, SIZE, stride);\
yading@10 331 ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, temp, stride, SIZE);\
yading@10 332 }\
yading@10 333 \
yading@10 334 static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
yading@10 335 {\
yading@10 336 DECLARE_ALIGNED(ALIGN, uint16_t, temp)[SIZE*(SIZE<8?12:24)];\
yading@10 337 ff_ ## OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(dst, temp, src, stride, SIZE, stride);\
yading@10 338 }\
yading@10 339 \
yading@10 340 static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
yading@10 341 {\
yading@10 342 DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\
yading@10 343 uint8_t * const halfHV= temp;\
yading@10 344 int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
yading@10 345 av_assert2(((int)temp & 7) == 0);\
yading@10 346 ff_put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
yading@10 347 ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, halfHV, stride, SIZE);\
yading@10 348 }\
yading@10 349 \
yading@10 350 static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
yading@10 351 {\
yading@10 352 DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\
yading@10 353 uint8_t * const halfHV= temp;\
yading@10 354 int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
yading@10 355 av_assert2(((int)temp & 7) == 0);\
yading@10 356 ff_put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
yading@10 357 ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, halfHV, stride, SIZE);\
yading@10 358 }\
yading@10 359 \
yading@10 360 static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
yading@10 361 {\
yading@10 362 DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\
yading@10 363 uint8_t * const halfHV= temp;\
yading@10 364 int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
yading@10 365 av_assert2(((int)temp & 7) == 0);\
yading@10 366 ff_put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
yading@10 367 ff_ ## OPNAME ## pixels ## SIZE ## _l2_shift5_mmxext(dst, halfV+2, halfHV, stride, SIZE, SIZE);\
yading@10 368 }\
yading@10 369 \
yading@10 370 static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
yading@10 371 {\
yading@10 372 DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\
yading@10 373 uint8_t * const halfHV= temp;\
yading@10 374 int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
yading@10 375 av_assert2(((int)temp & 7) == 0);\
yading@10 376 ff_put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
yading@10 377 ff_ ## OPNAME ## pixels ## SIZE ## _l2_shift5_mmxext(dst, halfV+3, halfHV, stride, SIZE, SIZE);\
yading@10 378 }\
yading@10 379
yading@10 380 #define H264_MC_4816(MMX)\
yading@10 381 H264_MC(put_, 4, MMX, 8)\
yading@10 382 H264_MC(put_, 8, MMX, 8)\
yading@10 383 H264_MC(put_, 16,MMX, 8)\
yading@10 384 H264_MC(avg_, 4, MMX, 8)\
yading@10 385 H264_MC(avg_, 8, MMX, 8)\
yading@10 386 H264_MC(avg_, 16,MMX, 8)\
yading@10 387
yading@10 388 #define H264_MC_816(QPEL, XMM)\
yading@10 389 QPEL(put_, 8, XMM, 16)\
yading@10 390 QPEL(put_, 16,XMM, 16)\
yading@10 391 QPEL(avg_, 8, XMM, 16)\
yading@10 392 QPEL(avg_, 16,XMM, 16)\
yading@10 393
yading@10 394 QPEL_H264(put_, PUT_OP, mmxext)
yading@10 395 QPEL_H264(avg_, AVG_MMXEXT_OP, mmxext)
yading@10 396 QPEL_H264_V_XMM(put_, PUT_OP, sse2)
yading@10 397 QPEL_H264_V_XMM(avg_,AVG_MMXEXT_OP, sse2)
yading@10 398 QPEL_H264_HV_XMM(put_, PUT_OP, sse2)
yading@10 399 QPEL_H264_HV_XMM(avg_,AVG_MMXEXT_OP, sse2)
yading@10 400 QPEL_H264_H_XMM(put_, PUT_OP, ssse3)
yading@10 401 QPEL_H264_H_XMM(avg_,AVG_MMXEXT_OP, ssse3)
yading@10 402 QPEL_H264_HV_XMM(put_, PUT_OP, ssse3)
yading@10 403 QPEL_H264_HV_XMM(avg_,AVG_MMXEXT_OP, ssse3)
yading@10 404
yading@10 405 H264_MC_4816(mmxext)
yading@10 406 H264_MC_816(H264_MC_V, sse2)
yading@10 407 H264_MC_816(H264_MC_HV, sse2)
yading@10 408 H264_MC_816(H264_MC_H, ssse3)
yading@10 409 H264_MC_816(H264_MC_HV, ssse3)
yading@10 410
yading@10 411
yading@10 412 //10bit
yading@10 413 #define LUMA_MC_OP(OP, NUM, DEPTH, TYPE, OPT) \
yading@10 414 void ff_ ## OP ## _h264_qpel ## NUM ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT \
yading@10 415 (uint8_t *dst, uint8_t *src, ptrdiff_t stride);
yading@10 416
yading@10 417 #define LUMA_MC_ALL(DEPTH, TYPE, OPT) \
yading@10 418 LUMA_MC_OP(put, 4, DEPTH, TYPE, OPT) \
yading@10 419 LUMA_MC_OP(avg, 4, DEPTH, TYPE, OPT) \
yading@10 420 LUMA_MC_OP(put, 8, DEPTH, TYPE, OPT) \
yading@10 421 LUMA_MC_OP(avg, 8, DEPTH, TYPE, OPT) \
yading@10 422 LUMA_MC_OP(put, 16, DEPTH, TYPE, OPT) \
yading@10 423 LUMA_MC_OP(avg, 16, DEPTH, TYPE, OPT)
yading@10 424
yading@10 425 #define LUMA_MC_816(DEPTH, TYPE, OPT) \
yading@10 426 LUMA_MC_OP(put, 8, DEPTH, TYPE, OPT) \
yading@10 427 LUMA_MC_OP(avg, 8, DEPTH, TYPE, OPT) \
yading@10 428 LUMA_MC_OP(put, 16, DEPTH, TYPE, OPT) \
yading@10 429 LUMA_MC_OP(avg, 16, DEPTH, TYPE, OPT)
yading@10 430
yading@10 431 LUMA_MC_ALL(10, mc00, mmxext)
yading@10 432 LUMA_MC_ALL(10, mc10, mmxext)
yading@10 433 LUMA_MC_ALL(10, mc20, mmxext)
yading@10 434 LUMA_MC_ALL(10, mc30, mmxext)
yading@10 435 LUMA_MC_ALL(10, mc01, mmxext)
yading@10 436 LUMA_MC_ALL(10, mc11, mmxext)
yading@10 437 LUMA_MC_ALL(10, mc21, mmxext)
yading@10 438 LUMA_MC_ALL(10, mc31, mmxext)
yading@10 439 LUMA_MC_ALL(10, mc02, mmxext)
yading@10 440 LUMA_MC_ALL(10, mc12, mmxext)
yading@10 441 LUMA_MC_ALL(10, mc22, mmxext)
yading@10 442 LUMA_MC_ALL(10, mc32, mmxext)
yading@10 443 LUMA_MC_ALL(10, mc03, mmxext)
yading@10 444 LUMA_MC_ALL(10, mc13, mmxext)
yading@10 445 LUMA_MC_ALL(10, mc23, mmxext)
yading@10 446 LUMA_MC_ALL(10, mc33, mmxext)
yading@10 447
yading@10 448 LUMA_MC_816(10, mc00, sse2)
yading@10 449 LUMA_MC_816(10, mc10, sse2)
yading@10 450 LUMA_MC_816(10, mc10, sse2_cache64)
yading@10 451 LUMA_MC_816(10, mc10, ssse3_cache64)
yading@10 452 LUMA_MC_816(10, mc20, sse2)
yading@10 453 LUMA_MC_816(10, mc20, sse2_cache64)
yading@10 454 LUMA_MC_816(10, mc20, ssse3_cache64)
yading@10 455 LUMA_MC_816(10, mc30, sse2)
yading@10 456 LUMA_MC_816(10, mc30, sse2_cache64)
yading@10 457 LUMA_MC_816(10, mc30, ssse3_cache64)
yading@10 458 LUMA_MC_816(10, mc01, sse2)
yading@10 459 LUMA_MC_816(10, mc11, sse2)
yading@10 460 LUMA_MC_816(10, mc21, sse2)
yading@10 461 LUMA_MC_816(10, mc31, sse2)
yading@10 462 LUMA_MC_816(10, mc02, sse2)
yading@10 463 LUMA_MC_816(10, mc12, sse2)
yading@10 464 LUMA_MC_816(10, mc22, sse2)
yading@10 465 LUMA_MC_816(10, mc32, sse2)
yading@10 466 LUMA_MC_816(10, mc03, sse2)
yading@10 467 LUMA_MC_816(10, mc13, sse2)
yading@10 468 LUMA_MC_816(10, mc23, sse2)
yading@10 469 LUMA_MC_816(10, mc33, sse2)
yading@10 470
yading@10 471 #define QPEL16_OPMC(OP, MC, MMX)\
yading@10 472 void ff_ ## OP ## _h264_qpel16_ ## MC ## _10_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride){\
yading@10 473 ff_ ## OP ## _h264_qpel8_ ## MC ## _10_ ## MMX(dst , src , stride);\
yading@10 474 ff_ ## OP ## _h264_qpel8_ ## MC ## _10_ ## MMX(dst+16, src+16, stride);\
yading@10 475 src += 8*stride;\
yading@10 476 dst += 8*stride;\
yading@10 477 ff_ ## OP ## _h264_qpel8_ ## MC ## _10_ ## MMX(dst , src , stride);\
yading@10 478 ff_ ## OP ## _h264_qpel8_ ## MC ## _10_ ## MMX(dst+16, src+16, stride);\
yading@10 479 }
yading@10 480
yading@10 481 #define QPEL16_OP(MC, MMX)\
yading@10 482 QPEL16_OPMC(put, MC, MMX)\
yading@10 483 QPEL16_OPMC(avg, MC, MMX)
yading@10 484
yading@10 485 #define QPEL16(MMX)\
yading@10 486 QPEL16_OP(mc00, MMX)\
yading@10 487 QPEL16_OP(mc01, MMX)\
yading@10 488 QPEL16_OP(mc02, MMX)\
yading@10 489 QPEL16_OP(mc03, MMX)\
yading@10 490 QPEL16_OP(mc10, MMX)\
yading@10 491 QPEL16_OP(mc11, MMX)\
yading@10 492 QPEL16_OP(mc12, MMX)\
yading@10 493 QPEL16_OP(mc13, MMX)\
yading@10 494 QPEL16_OP(mc20, MMX)\
yading@10 495 QPEL16_OP(mc21, MMX)\
yading@10 496 QPEL16_OP(mc22, MMX)\
yading@10 497 QPEL16_OP(mc23, MMX)\
yading@10 498 QPEL16_OP(mc30, MMX)\
yading@10 499 QPEL16_OP(mc31, MMX)\
yading@10 500 QPEL16_OP(mc32, MMX)\
yading@10 501 QPEL16_OP(mc33, MMX)
yading@10 502
yading@10 503 #if ARCH_X86_32 && HAVE_YASM && CONFIG_H264QPEL // ARCH_X86_64 implies SSE2+
yading@10 504 QPEL16(mmxext)
yading@10 505 #endif
yading@10 506
yading@10 507 #endif /* HAVE_YASM */
yading@10 508
yading@10 509 #define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX) \
yading@10 510 do { \
yading@10 511 c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \
yading@10 512 c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \
yading@10 513 c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \
yading@10 514 c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \
yading@10 515 c->PFX ## _pixels_tab[IDX][ 4] = PREFIX ## PFX ## SIZE ## _mc01_ ## CPU; \
yading@10 516 c->PFX ## _pixels_tab[IDX][ 5] = PREFIX ## PFX ## SIZE ## _mc11_ ## CPU; \
yading@10 517 c->PFX ## _pixels_tab[IDX][ 6] = PREFIX ## PFX ## SIZE ## _mc21_ ## CPU; \
yading@10 518 c->PFX ## _pixels_tab[IDX][ 7] = PREFIX ## PFX ## SIZE ## _mc31_ ## CPU; \
yading@10 519 c->PFX ## _pixels_tab[IDX][ 8] = PREFIX ## PFX ## SIZE ## _mc02_ ## CPU; \
yading@10 520 c->PFX ## _pixels_tab[IDX][ 9] = PREFIX ## PFX ## SIZE ## _mc12_ ## CPU; \
yading@10 521 c->PFX ## _pixels_tab[IDX][10] = PREFIX ## PFX ## SIZE ## _mc22_ ## CPU; \
yading@10 522 c->PFX ## _pixels_tab[IDX][11] = PREFIX ## PFX ## SIZE ## _mc32_ ## CPU; \
yading@10 523 c->PFX ## _pixels_tab[IDX][12] = PREFIX ## PFX ## SIZE ## _mc03_ ## CPU; \
yading@10 524 c->PFX ## _pixels_tab[IDX][13] = PREFIX ## PFX ## SIZE ## _mc13_ ## CPU; \
yading@10 525 c->PFX ## _pixels_tab[IDX][14] = PREFIX ## PFX ## SIZE ## _mc23_ ## CPU; \
yading@10 526 c->PFX ## _pixels_tab[IDX][15] = PREFIX ## PFX ## SIZE ## _mc33_ ## CPU; \
yading@10 527 } while (0)
yading@10 528
yading@10 529 #define H264_QPEL_FUNCS(x, y, CPU) \
yading@10 530 do { \
yading@10 531 c->put_h264_qpel_pixels_tab[0][x + y * 4] = put_h264_qpel16_mc ## x ## y ## _ ## CPU; \
yading@10 532 c->put_h264_qpel_pixels_tab[1][x + y * 4] = put_h264_qpel8_mc ## x ## y ## _ ## CPU; \
yading@10 533 c->avg_h264_qpel_pixels_tab[0][x + y * 4] = avg_h264_qpel16_mc ## x ## y ## _ ## CPU; \
yading@10 534 c->avg_h264_qpel_pixels_tab[1][x + y * 4] = avg_h264_qpel8_mc ## x ## y ## _ ## CPU; \
yading@10 535 } while (0)
yading@10 536
yading@10 537 #define H264_QPEL_FUNCS_10(x, y, CPU) \
yading@10 538 do { \
yading@10 539 c->put_h264_qpel_pixels_tab[0][x + y * 4] = ff_put_h264_qpel16_mc ## x ## y ## _10_ ## CPU; \
yading@10 540 c->put_h264_qpel_pixels_tab[1][x + y * 4] = ff_put_h264_qpel8_mc ## x ## y ## _10_ ## CPU; \
yading@10 541 c->avg_h264_qpel_pixels_tab[0][x + y * 4] = ff_avg_h264_qpel16_mc ## x ## y ## _10_ ## CPU; \
yading@10 542 c->avg_h264_qpel_pixels_tab[1][x + y * 4] = ff_avg_h264_qpel8_mc ## x ## y ## _10_ ## CPU; \
yading@10 543 } while (0)
yading@10 544
yading@10 545 av_cold void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth)
yading@10 546 {
yading@10 547 #if HAVE_YASM
yading@10 548 int high_bit_depth = bit_depth > 8;
yading@10 549 int mm_flags = av_get_cpu_flags();
yading@10 550
yading@10 551 if (EXTERNAL_MMXEXT(mm_flags)) {
yading@10 552 if (!high_bit_depth) {
yading@10 553 SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmxext, );
yading@10 554 SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmxext, );
yading@10 555 SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmxext, );
yading@10 556 SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmxext, );
yading@10 557 SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmxext, );
yading@10 558 SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmxext, );
yading@10 559 } else if (bit_depth == 10) {
yading@10 560 #if ARCH_X86_32
yading@10 561 SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_mmxext, ff_);
yading@10 562 SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_mmxext, ff_);
yading@10 563 SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_mmxext, ff_);
yading@10 564 SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 10_mmxext, ff_);
yading@10 565 #endif
yading@10 566 SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 10_mmxext, ff_);
yading@10 567 SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 10_mmxext, ff_);
yading@10 568 }
yading@10 569 }
yading@10 570
yading@10 571 if (EXTERNAL_SSE2(mm_flags)) {
yading@10 572 if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW) && !high_bit_depth) {
yading@10 573 // these functions are slower than mmx on AMD, but faster on Intel
yading@10 574 H264_QPEL_FUNCS(0, 0, sse2);
yading@10 575 }
yading@10 576
yading@10 577 if (!high_bit_depth) {
yading@10 578 H264_QPEL_FUNCS(0, 1, sse2);
yading@10 579 H264_QPEL_FUNCS(0, 2, sse2);
yading@10 580 H264_QPEL_FUNCS(0, 3, sse2);
yading@10 581 H264_QPEL_FUNCS(1, 1, sse2);
yading@10 582 H264_QPEL_FUNCS(1, 2, sse2);
yading@10 583 H264_QPEL_FUNCS(1, 3, sse2);
yading@10 584 H264_QPEL_FUNCS(2, 1, sse2);
yading@10 585 H264_QPEL_FUNCS(2, 2, sse2);
yading@10 586 H264_QPEL_FUNCS(2, 3, sse2);
yading@10 587 H264_QPEL_FUNCS(3, 1, sse2);
yading@10 588 H264_QPEL_FUNCS(3, 2, sse2);
yading@10 589 H264_QPEL_FUNCS(3, 3, sse2);
yading@10 590 }
yading@10 591
yading@10 592 if (bit_depth == 10) {
yading@10 593 SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_sse2, ff_);
yading@10 594 SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_sse2, ff_);
yading@10 595 SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_sse2, ff_);
yading@10 596 SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 10_sse2, ff_);
yading@10 597 H264_QPEL_FUNCS_10(1, 0, sse2_cache64);
yading@10 598 H264_QPEL_FUNCS_10(2, 0, sse2_cache64);
yading@10 599 H264_QPEL_FUNCS_10(3, 0, sse2_cache64);
yading@10 600 }
yading@10 601 }
yading@10 602
yading@10 603 if (EXTERNAL_SSSE3(mm_flags)) {
yading@10 604 if (!high_bit_depth) {
yading@10 605 H264_QPEL_FUNCS(1, 0, ssse3);
yading@10 606 H264_QPEL_FUNCS(1, 1, ssse3);
yading@10 607 H264_QPEL_FUNCS(1, 2, ssse3);
yading@10 608 H264_QPEL_FUNCS(1, 3, ssse3);
yading@10 609 H264_QPEL_FUNCS(2, 0, ssse3);
yading@10 610 H264_QPEL_FUNCS(2, 1, ssse3);
yading@10 611 H264_QPEL_FUNCS(2, 2, ssse3);
yading@10 612 H264_QPEL_FUNCS(2, 3, ssse3);
yading@10 613 H264_QPEL_FUNCS(3, 0, ssse3);
yading@10 614 H264_QPEL_FUNCS(3, 1, ssse3);
yading@10 615 H264_QPEL_FUNCS(3, 2, ssse3);
yading@10 616 H264_QPEL_FUNCS(3, 3, ssse3);
yading@10 617 }
yading@10 618
yading@10 619 if (bit_depth == 10) {
yading@10 620 H264_QPEL_FUNCS_10(1, 0, ssse3_cache64);
yading@10 621 H264_QPEL_FUNCS_10(2, 0, ssse3_cache64);
yading@10 622 H264_QPEL_FUNCS_10(3, 0, ssse3_cache64);
yading@10 623 }
yading@10 624 }
yading@10 625
yading@10 626 if (EXTERNAL_AVX(mm_flags)) {
yading@10 627 /* AVX implies 64 byte cache lines without the need to avoid unaligned
yading@10 628 * memory accesses that cross the boundary between two cache lines.
yading@10 629 * TODO: Port X264_CPU_CACHELINE_32/64 detection from x264 to avoid
yading@10 630 * having to treat SSE2 functions with such properties as AVX. */
yading@10 631 if (bit_depth == 10) {
yading@10 632 H264_QPEL_FUNCS_10(1, 0, sse2);
yading@10 633 H264_QPEL_FUNCS_10(2, 0, sse2);
yading@10 634 H264_QPEL_FUNCS_10(3, 0, sse2);
yading@10 635 }
yading@10 636 }
yading@10 637 #endif
yading@10 638 }