annotate ffmpeg/libavcodec/x86/dirac_dwt.c @ 13:844d341cf643 tip

Back up before ISMIR
author Yading Song <yading.song@eecs.qmul.ac.uk>
date Thu, 31 Oct 2013 13:17:06 +0000
parents 6840f77b83aa
children
rev   line source
yading@10 1 /*
yading@10 2 * MMX optimized discrete wavelet transform
yading@10 3 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
yading@10 4 * Copyright (c) 2010 David Conrad
yading@10 5 *
yading@10 6 * This file is part of FFmpeg.
yading@10 7 *
yading@10 8 * FFmpeg is free software; you can redistribute it and/or
yading@10 9 * modify it under the terms of the GNU Lesser General Public
yading@10 10 * License as published by the Free Software Foundation; either
yading@10 11 * version 2.1 of the License, or (at your option) any later version.
yading@10 12 *
yading@10 13 * FFmpeg is distributed in the hope that it will be useful,
yading@10 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
yading@10 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
yading@10 16 * Lesser General Public License for more details.
yading@10 17 *
yading@10 18 * You should have received a copy of the GNU Lesser General Public
yading@10 19 * License along with FFmpeg; if not, write to the Free Software
yading@10 20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
yading@10 21 */
yading@10 22
yading@10 23 #include "libavutil/x86/asm.h"
yading@10 24 #include "dsputil_mmx.h"
yading@10 25 #include "dirac_dwt.h"
yading@10 26
yading@10 27 #define COMPOSE_VERTICAL(ext, align) \
yading@10 28 void ff_vertical_compose53iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width); \
yading@10 29 void ff_vertical_compose_dirac53iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width); \
yading@10 30 void ff_vertical_compose_dd137iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, int width); \
yading@10 31 void ff_vertical_compose_dd97iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, int width); \
yading@10 32 void ff_vertical_compose_haar##ext(IDWTELEM *b0, IDWTELEM *b1, int width); \
yading@10 33 void ff_horizontal_compose_haar0i##ext(IDWTELEM *b, IDWTELEM *tmp, int w);\
yading@10 34 void ff_horizontal_compose_haar1i##ext(IDWTELEM *b, IDWTELEM *tmp, int w);\
yading@10 35 \
yading@10 36 static void vertical_compose53iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width) \
yading@10 37 { \
yading@10 38 int i, width_align = width&~(align-1); \
yading@10 39 \
yading@10 40 for(i=width_align; i<width; i++) \
yading@10 41 b1[i] = COMPOSE_53iL0(b0[i], b1[i], b2[i]); \
yading@10 42 \
yading@10 43 ff_vertical_compose53iL0##ext(b0, b1, b2, width_align); \
yading@10 44 } \
yading@10 45 \
yading@10 46 static void vertical_compose_dirac53iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width) \
yading@10 47 { \
yading@10 48 int i, width_align = width&~(align-1); \
yading@10 49 \
yading@10 50 for(i=width_align; i<width; i++) \
yading@10 51 b1[i] = COMPOSE_DIRAC53iH0(b0[i], b1[i], b2[i]); \
yading@10 52 \
yading@10 53 ff_vertical_compose_dirac53iH0##ext(b0, b1, b2, width_align); \
yading@10 54 } \
yading@10 55 \
yading@10 56 static void vertical_compose_dd137iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, \
yading@10 57 IDWTELEM *b3, IDWTELEM *b4, int width) \
yading@10 58 { \
yading@10 59 int i, width_align = width&~(align-1); \
yading@10 60 \
yading@10 61 for(i=width_align; i<width; i++) \
yading@10 62 b2[i] = COMPOSE_DD137iL0(b0[i], b1[i], b2[i], b3[i], b4[i]); \
yading@10 63 \
yading@10 64 ff_vertical_compose_dd137iL0##ext(b0, b1, b2, b3, b4, width_align); \
yading@10 65 } \
yading@10 66 \
yading@10 67 static void vertical_compose_dd97iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, \
yading@10 68 IDWTELEM *b3, IDWTELEM *b4, int width) \
yading@10 69 { \
yading@10 70 int i, width_align = width&~(align-1); \
yading@10 71 \
yading@10 72 for(i=width_align; i<width; i++) \
yading@10 73 b2[i] = COMPOSE_DD97iH0(b0[i], b1[i], b2[i], b3[i], b4[i]); \
yading@10 74 \
yading@10 75 ff_vertical_compose_dd97iH0##ext(b0, b1, b2, b3, b4, width_align); \
yading@10 76 } \
yading@10 77 static void vertical_compose_haar##ext(IDWTELEM *b0, IDWTELEM *b1, int width) \
yading@10 78 { \
yading@10 79 int i, width_align = width&~(align-1); \
yading@10 80 \
yading@10 81 for(i=width_align; i<width; i++) { \
yading@10 82 b0[i] = COMPOSE_HAARiL0(b0[i], b1[i]); \
yading@10 83 b1[i] = COMPOSE_HAARiH0(b1[i], b0[i]); \
yading@10 84 } \
yading@10 85 \
yading@10 86 ff_vertical_compose_haar##ext(b0, b1, width_align); \
yading@10 87 } \
yading@10 88 static void horizontal_compose_haar0i##ext(IDWTELEM *b, IDWTELEM *tmp, int w)\
yading@10 89 {\
yading@10 90 int w2= w>>1;\
yading@10 91 int x= w2 - (w2&(align-1));\
yading@10 92 ff_horizontal_compose_haar0i##ext(b, tmp, w);\
yading@10 93 \
yading@10 94 for (; x < w2; x++) {\
yading@10 95 b[2*x ] = tmp[x];\
yading@10 96 b[2*x+1] = COMPOSE_HAARiH0(b[x+w2], tmp[x]);\
yading@10 97 }\
yading@10 98 }\
yading@10 99 static void horizontal_compose_haar1i##ext(IDWTELEM *b, IDWTELEM *tmp, int w)\
yading@10 100 {\
yading@10 101 int w2= w>>1;\
yading@10 102 int x= w2 - (w2&(align-1));\
yading@10 103 ff_horizontal_compose_haar1i##ext(b, tmp, w);\
yading@10 104 \
yading@10 105 for (; x < w2; x++) {\
yading@10 106 b[2*x ] = (tmp[x] + 1)>>1;\
yading@10 107 b[2*x+1] = (COMPOSE_HAARiH0(b[x+w2], tmp[x]) + 1)>>1;\
yading@10 108 }\
yading@10 109 }\
yading@10 110 \
yading@10 111
yading@10 112 #if HAVE_YASM
yading@10 113 #if !ARCH_X86_64
yading@10 114 COMPOSE_VERTICAL(_mmx, 4)
yading@10 115 #endif
yading@10 116 COMPOSE_VERTICAL(_sse2, 8)
yading@10 117
yading@10 118
yading@10 119 void ff_horizontal_compose_dd97i_ssse3(IDWTELEM *b, IDWTELEM *tmp, int w);
yading@10 120
yading@10 121 static void horizontal_compose_dd97i_ssse3(IDWTELEM *b, IDWTELEM *tmp, int w)
yading@10 122 {
yading@10 123 int w2= w>>1;
yading@10 124 int x= w2 - (w2&7);
yading@10 125 ff_horizontal_compose_dd97i_ssse3(b, tmp, w);
yading@10 126
yading@10 127 for (; x < w2; x++) {
yading@10 128 b[2*x ] = (tmp[x] + 1)>>1;
yading@10 129 b[2*x+1] = (COMPOSE_DD97iH0(tmp[x-1], tmp[x], b[x+w2], tmp[x+1], tmp[x+2]) + 1)>>1;
yading@10 130 }
yading@10 131 }
yading@10 132 #endif
yading@10 133
yading@10 134 void ff_spatial_idwt_init_mmx(DWTContext *d, enum dwt_type type)
yading@10 135 {
yading@10 136 #if HAVE_YASM
yading@10 137 int mm_flags = av_get_cpu_flags();
yading@10 138
yading@10 139 #if !ARCH_X86_64
yading@10 140 if (!(mm_flags & AV_CPU_FLAG_MMX))
yading@10 141 return;
yading@10 142
yading@10 143 switch (type) {
yading@10 144 case DWT_DIRAC_DD9_7:
yading@10 145 d->vertical_compose_l0 = (void*)vertical_compose53iL0_mmx;
yading@10 146 d->vertical_compose_h0 = (void*)vertical_compose_dd97iH0_mmx;
yading@10 147 break;
yading@10 148 case DWT_DIRAC_LEGALL5_3:
yading@10 149 d->vertical_compose_l0 = (void*)vertical_compose53iL0_mmx;
yading@10 150 d->vertical_compose_h0 = (void*)vertical_compose_dirac53iH0_mmx;
yading@10 151 break;
yading@10 152 case DWT_DIRAC_DD13_7:
yading@10 153 d->vertical_compose_l0 = (void*)vertical_compose_dd137iL0_mmx;
yading@10 154 d->vertical_compose_h0 = (void*)vertical_compose_dd97iH0_mmx;
yading@10 155 break;
yading@10 156 case DWT_DIRAC_HAAR0:
yading@10 157 d->vertical_compose = (void*)vertical_compose_haar_mmx;
yading@10 158 d->horizontal_compose = horizontal_compose_haar0i_mmx;
yading@10 159 break;
yading@10 160 case DWT_DIRAC_HAAR1:
yading@10 161 d->vertical_compose = (void*)vertical_compose_haar_mmx;
yading@10 162 d->horizontal_compose = horizontal_compose_haar1i_mmx;
yading@10 163 break;
yading@10 164 }
yading@10 165 #endif
yading@10 166
yading@10 167 if (!(mm_flags & AV_CPU_FLAG_SSE2))
yading@10 168 return;
yading@10 169
yading@10 170 switch (type) {
yading@10 171 case DWT_DIRAC_DD9_7:
yading@10 172 d->vertical_compose_l0 = (void*)vertical_compose53iL0_sse2;
yading@10 173 d->vertical_compose_h0 = (void*)vertical_compose_dd97iH0_sse2;
yading@10 174 break;
yading@10 175 case DWT_DIRAC_LEGALL5_3:
yading@10 176 d->vertical_compose_l0 = (void*)vertical_compose53iL0_sse2;
yading@10 177 d->vertical_compose_h0 = (void*)vertical_compose_dirac53iH0_sse2;
yading@10 178 break;
yading@10 179 case DWT_DIRAC_DD13_7:
yading@10 180 d->vertical_compose_l0 = (void*)vertical_compose_dd137iL0_sse2;
yading@10 181 d->vertical_compose_h0 = (void*)vertical_compose_dd97iH0_sse2;
yading@10 182 break;
yading@10 183 case DWT_DIRAC_HAAR0:
yading@10 184 d->vertical_compose = (void*)vertical_compose_haar_sse2;
yading@10 185 d->horizontal_compose = horizontal_compose_haar0i_sse2;
yading@10 186 break;
yading@10 187 case DWT_DIRAC_HAAR1:
yading@10 188 d->vertical_compose = (void*)vertical_compose_haar_sse2;
yading@10 189 d->horizontal_compose = horizontal_compose_haar1i_sse2;
yading@10 190 break;
yading@10 191 }
yading@10 192
yading@10 193 if (!(mm_flags & AV_CPU_FLAG_SSSE3))
yading@10 194 return;
yading@10 195
yading@10 196 switch (type) {
yading@10 197 case DWT_DIRAC_DD9_7:
yading@10 198 d->horizontal_compose = horizontal_compose_dd97i_ssse3;
yading@10 199 break;
yading@10 200 }
yading@10 201 #endif // HAVE_YASM
yading@10 202 }