yading@10: /* yading@10: * Copyright (C) 2010 David Conrad yading@10: * yading@10: * This file is part of FFmpeg. yading@10: * yading@10: * FFmpeg is free software; you can redistribute it and/or yading@10: * modify it under the terms of the GNU Lesser General Public yading@10: * License as published by the Free Software Foundation; either yading@10: * version 2.1 of the License, or (at your option) any later version. yading@10: * yading@10: * FFmpeg is distributed in the hope that it will be useful, yading@10: * but WITHOUT ANY WARRANTY; without even the implied warranty of yading@10: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU yading@10: * Lesser General Public License for more details. yading@10: * yading@10: * You should have received a copy of the GNU Lesser General Public yading@10: * License along with FFmpeg; if not, write to the Free Software yading@10: * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA yading@10: */ yading@10: yading@10: #include "dsputil_mmx.h" yading@10: #include "diracdsp_mmx.h" yading@10: yading@10: void ff_put_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height); yading@10: void ff_put_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height); yading@10: void ff_put_signed_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height); yading@10: void ff_put_signed_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height); yading@10: yading@10: #define HPEL_FILTER(MMSIZE, EXT) \ yading@10: void ff_dirac_hpel_filter_v_ ## EXT(uint8_t *, const uint8_t *, int, int); \ yading@10: void ff_dirac_hpel_filter_h_ ## EXT(uint8_t *, const uint8_t *, int); \ yading@10: \ yading@10: static void dirac_hpel_filter_ ## EXT(uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, \ yading@10: const uint8_t *src, int stride, int width, int height) \ yading@10: { \ yading@10: while( height-- ) \ yading@10: { \ yading@10: ff_dirac_hpel_filter_v_ ## EXT(dstv-MMSIZE, src-MMSIZE, stride, width+MMSIZE+5); \ yading@10: ff_dirac_hpel_filter_h_ ## EXT(dsth, src, width); \ yading@10: ff_dirac_hpel_filter_h_ ## EXT(dstc, dstv, width); \ yading@10: \ yading@10: dsth += stride; \ yading@10: dstv += stride; \ yading@10: dstc += stride; \ yading@10: src += stride; \ yading@10: } \ yading@10: } yading@10: yading@10: #if !ARCH_X86_64 yading@10: HPEL_FILTER(8, mmx) yading@10: #endif yading@10: HPEL_FILTER(16, sse2) yading@10: yading@10: #define PIXFUNC(PFX, IDX, EXT) \ yading@10: /*MMXDISABLEDc->PFX ## _dirac_pixels_tab[0][IDX] = ff_ ## PFX ## _dirac_pixels8_ ## EXT;*/ \ yading@10: c->PFX ## _dirac_pixels_tab[1][IDX] = ff_ ## PFX ## _dirac_pixels16_ ## EXT; \ yading@10: c->PFX ## _dirac_pixels_tab[2][IDX] = ff_ ## PFX ## _dirac_pixels32_ ## EXT yading@10: yading@10: void ff_diracdsp_init_mmx(DiracDSPContext* c) yading@10: { yading@10: int mm_flags = av_get_cpu_flags(); yading@10: yading@10: if (!(mm_flags & AV_CPU_FLAG_MMX)) yading@10: return; yading@10: yading@10: #if HAVE_YASM yading@10: c->add_dirac_obmc[0] = ff_add_dirac_obmc8_mmx; yading@10: #if !ARCH_X86_64 yading@10: c->add_dirac_obmc[1] = ff_add_dirac_obmc16_mmx; yading@10: c->add_dirac_obmc[2] = ff_add_dirac_obmc32_mmx; yading@10: c->dirac_hpel_filter = dirac_hpel_filter_mmx; yading@10: c->add_rect_clamped = ff_add_rect_clamped_mmx; yading@10: c->put_signed_rect_clamped = ff_put_signed_rect_clamped_mmx; yading@10: #endif yading@10: #endif yading@10: yading@10: #if HAVE_MMX_INLINE yading@10: PIXFUNC(put, 0, mmx); yading@10: PIXFUNC(avg, 0, mmx); yading@10: #endif yading@10: yading@10: #if HAVE_MMXEXT_INLINE yading@10: if (mm_flags & AV_CPU_FLAG_MMX2) { yading@10: PIXFUNC(avg, 0, mmxext); yading@10: } yading@10: #endif yading@10: yading@10: if (mm_flags & AV_CPU_FLAG_SSE2) { yading@10: #if HAVE_YASM yading@10: c->dirac_hpel_filter = dirac_hpel_filter_sse2; yading@10: c->add_rect_clamped = ff_add_rect_clamped_sse2; yading@10: c->put_signed_rect_clamped = ff_put_signed_rect_clamped_sse2; yading@10: yading@10: c->add_dirac_obmc[1] = ff_add_dirac_obmc16_sse2; yading@10: c->add_dirac_obmc[2] = ff_add_dirac_obmc32_sse2; yading@10: #endif yading@10: #if HAVE_SSE2_INLINE yading@10: c->put_dirac_pixels_tab[1][0] = ff_put_dirac_pixels16_sse2; yading@10: c->avg_dirac_pixels_tab[1][0] = ff_avg_dirac_pixels16_sse2; yading@10: c->put_dirac_pixels_tab[2][0] = ff_put_dirac_pixels32_sse2; yading@10: c->avg_dirac_pixels_tab[2][0] = ff_avg_dirac_pixels32_sse2; yading@10: #endif yading@10: } yading@10: }