yading@10: ;****************************************************************************** yading@10: ;* MMX optimized DSP utils yading@10: ;* Copyright (c) 2008 Loren Merritt yading@10: ;* Copyright (c) 2003-2013 Michael Niedermayer yading@10: ;* Copyright (c) 2013 Daniel Kang yading@10: ;* yading@10: ;* This file is part of FFmpeg. yading@10: ;* yading@10: ;* FFmpeg is free software; you can redistribute it and/or yading@10: ;* modify it under the terms of the GNU Lesser General Public yading@10: ;* License as published by the Free Software Foundation; either yading@10: ;* version 2.1 of the License, or (at your option) any later version. yading@10: ;* yading@10: ;* FFmpeg is distributed in the hope that it will be useful, yading@10: ;* but WITHOUT ANY WARRANTY; without even the implied warranty of yading@10: ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU yading@10: ;* Lesser General Public License for more details. yading@10: ;* yading@10: ;* You should have received a copy of the GNU Lesser General Public yading@10: ;* License along with FFmpeg; if not, write to the Free Software yading@10: ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA yading@10: ;****************************************************************************** yading@10: yading@10: %include "libavutil/x86/x86util.asm" yading@10: yading@10: SECTION .text yading@10: yading@10: INIT_MMX mmxext yading@10: ; void pixels(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) yading@10: %macro PIXELS48 2 yading@10: %if %2 == 4 yading@10: %define OP movh yading@10: %else yading@10: %define OP mova yading@10: %endif yading@10: cglobal %1_pixels%2, 4,5 yading@10: movsxdifnidn r2, r2d yading@10: lea r4, [r2*3] yading@10: .loop: yading@10: OP m0, [r1] yading@10: OP m1, [r1+r2] yading@10: OP m2, [r1+r2*2] yading@10: OP m3, [r1+r4] yading@10: lea r1, [r1+r2*4] yading@10: %ifidn %1, avg yading@10: pavgb m0, [r0] yading@10: pavgb m1, [r0+r2] yading@10: pavgb m2, [r0+r2*2] yading@10: pavgb m3, [r0+r4] yading@10: %endif yading@10: OP [r0], m0 yading@10: OP [r0+r2], m1 yading@10: OP [r0+r2*2], m2 yading@10: OP [r0+r4], m3 yading@10: sub r3d, 4 yading@10: lea r0, [r0+r2*4] yading@10: jne .loop yading@10: RET yading@10: %endmacro yading@10: yading@10: PIXELS48 put, 4 yading@10: PIXELS48 avg, 4 yading@10: PIXELS48 put, 8 yading@10: PIXELS48 avg, 8 yading@10: yading@10: yading@10: INIT_XMM sse2 yading@10: ; void put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) yading@10: cglobal put_pixels16, 4,5,4 yading@10: lea r4, [r2*3] yading@10: .loop: yading@10: movu m0, [r1] yading@10: movu m1, [r1+r2] yading@10: movu m2, [r1+r2*2] yading@10: movu m3, [r1+r4] yading@10: lea r1, [r1+r2*4] yading@10: mova [r0], m0 yading@10: mova [r0+r2], m1 yading@10: mova [r0+r2*2], m2 yading@10: mova [r0+r4], m3 yading@10: sub r3d, 4 yading@10: lea r0, [r0+r2*4] yading@10: jnz .loop yading@10: REP_RET yading@10: yading@10: ; void avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) yading@10: cglobal avg_pixels16, 4,5,4 yading@10: lea r4, [r2*3] yading@10: .loop: yading@10: movu m0, [r1] yading@10: movu m1, [r1+r2] yading@10: movu m2, [r1+r2*2] yading@10: movu m3, [r1+r4] yading@10: lea r1, [r1+r2*4] yading@10: pavgb m0, [r0] yading@10: pavgb m1, [r0+r2] yading@10: pavgb m2, [r0+r2*2] yading@10: pavgb m3, [r0+r4] yading@10: mova [r0], m0 yading@10: mova [r0+r2], m1 yading@10: mova [r0+r2*2], m2 yading@10: mova [r0+r4], m3 yading@10: sub r3d, 4 yading@10: lea r0, [r0+r2*4] yading@10: jnz .loop yading@10: REP_RET