annotate ffmpeg/libavcodec/vp3dsp.c @ 13:844d341cf643 tip

Back up before ISMIR
author Yading Song <yading.song@eecs.qmul.ac.uk>
date Thu, 31 Oct 2013 13:17:06 +0000
parents 6840f77b83aa
children
rev   line source
yading@10 1 /*
yading@10 2 * Copyright (C) 2004 the ffmpeg project
yading@10 3 *
yading@10 4 * This file is part of FFmpeg.
yading@10 5 *
yading@10 6 * FFmpeg is free software; you can redistribute it and/or
yading@10 7 * modify it under the terms of the GNU Lesser General Public
yading@10 8 * License as published by the Free Software Foundation; either
yading@10 9 * version 2.1 of the License, or (at your option) any later version.
yading@10 10 *
yading@10 11 * FFmpeg is distributed in the hope that it will be useful,
yading@10 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
yading@10 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
yading@10 14 * Lesser General Public License for more details.
yading@10 15 *
yading@10 16 * You should have received a copy of the GNU Lesser General Public
yading@10 17 * License along with FFmpeg; if not, write to the Free Software
yading@10 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
yading@10 19 */
yading@10 20
yading@10 21 /**
yading@10 22 * @file
yading@10 23 * Standard C DSP-oriented functions cribbed from the original VP3
yading@10 24 * source code.
yading@10 25 */
yading@10 26
yading@10 27 #include "libavutil/attributes.h"
yading@10 28 #include "libavutil/common.h"
yading@10 29 #include "avcodec.h"
yading@10 30 #include "dsputil.h"
yading@10 31 #include "rnd_avg.h"
yading@10 32 #include "vp3dsp.h"
yading@10 33
yading@10 34 #define IdctAdjustBeforeShift 8
yading@10 35 #define xC1S7 64277
yading@10 36 #define xC2S6 60547
yading@10 37 #define xC3S5 54491
yading@10 38 #define xC4S4 46341
yading@10 39 #define xC5S3 36410
yading@10 40 #define xC6S2 25080
yading@10 41 #define xC7S1 12785
yading@10 42
yading@10 43 #define M(a,b) (((a) * (b))>>16)
yading@10 44
yading@10 45 static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int type)
yading@10 46 {
yading@10 47 int16_t *ip = input;
yading@10 48
yading@10 49 int A, B, C, D, Ad, Bd, Cd, Dd, E, F, G, H;
yading@10 50 int Ed, Gd, Add, Bdd, Fd, Hd;
yading@10 51
yading@10 52 int i;
yading@10 53
yading@10 54 /* Inverse DCT on the rows now */
yading@10 55 for (i = 0; i < 8; i++) {
yading@10 56 /* Check for non-zero values */
yading@10 57 if ( ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8] |
yading@10 58 ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8] ) {
yading@10 59 A = M(xC1S7, ip[1 * 8]) + M(xC7S1, ip[7 * 8]);
yading@10 60 B = M(xC7S1, ip[1 * 8]) - M(xC1S7, ip[7 * 8]);
yading@10 61 C = M(xC3S5, ip[3 * 8]) + M(xC5S3, ip[5 * 8]);
yading@10 62 D = M(xC3S5, ip[5 * 8]) - M(xC5S3, ip[3 * 8]);
yading@10 63
yading@10 64 Ad = M(xC4S4, (A - C));
yading@10 65 Bd = M(xC4S4, (B - D));
yading@10 66
yading@10 67 Cd = A + C;
yading@10 68 Dd = B + D;
yading@10 69
yading@10 70 E = M(xC4S4, (ip[0 * 8] + ip[4 * 8]));
yading@10 71 F = M(xC4S4, (ip[0 * 8] - ip[4 * 8]));
yading@10 72
yading@10 73 G = M(xC2S6, ip[2 * 8]) + M(xC6S2, ip[6 * 8]);
yading@10 74 H = M(xC6S2, ip[2 * 8]) - M(xC2S6, ip[6 * 8]);
yading@10 75
yading@10 76 Ed = E - G;
yading@10 77 Gd = E + G;
yading@10 78
yading@10 79 Add = F + Ad;
yading@10 80 Bdd = Bd - H;
yading@10 81
yading@10 82 Fd = F - Ad;
yading@10 83 Hd = Bd + H;
yading@10 84
yading@10 85 /* Final sequence of operations over-write original inputs. */
yading@10 86 ip[0 * 8] = Gd + Cd ;
yading@10 87 ip[7 * 8] = Gd - Cd ;
yading@10 88
yading@10 89 ip[1 * 8] = Add + Hd;
yading@10 90 ip[2 * 8] = Add - Hd;
yading@10 91
yading@10 92 ip[3 * 8] = Ed + Dd ;
yading@10 93 ip[4 * 8] = Ed - Dd ;
yading@10 94
yading@10 95 ip[5 * 8] = Fd + Bdd;
yading@10 96 ip[6 * 8] = Fd - Bdd;
yading@10 97 }
yading@10 98
yading@10 99 ip += 1; /* next row */
yading@10 100 }
yading@10 101
yading@10 102 ip = input;
yading@10 103
yading@10 104 for ( i = 0; i < 8; i++) {
yading@10 105 /* Check for non-zero values (bitwise or faster than ||) */
yading@10 106 if ( ip[1] | ip[2] | ip[3] |
yading@10 107 ip[4] | ip[5] | ip[6] | ip[7] ) {
yading@10 108
yading@10 109 A = M(xC1S7, ip[1]) + M(xC7S1, ip[7]);
yading@10 110 B = M(xC7S1, ip[1]) - M(xC1S7, ip[7]);
yading@10 111 C = M(xC3S5, ip[3]) + M(xC5S3, ip[5]);
yading@10 112 D = M(xC3S5, ip[5]) - M(xC5S3, ip[3]);
yading@10 113
yading@10 114 Ad = M(xC4S4, (A - C));
yading@10 115 Bd = M(xC4S4, (B - D));
yading@10 116
yading@10 117 Cd = A + C;
yading@10 118 Dd = B + D;
yading@10 119
yading@10 120 E = M(xC4S4, (ip[0] + ip[4])) + 8;
yading@10 121 F = M(xC4S4, (ip[0] - ip[4])) + 8;
yading@10 122
yading@10 123 if(type==1){ //HACK
yading@10 124 E += 16*128;
yading@10 125 F += 16*128;
yading@10 126 }
yading@10 127
yading@10 128 G = M(xC2S6, ip[2]) + M(xC6S2, ip[6]);
yading@10 129 H = M(xC6S2, ip[2]) - M(xC2S6, ip[6]);
yading@10 130
yading@10 131 Ed = E - G;
yading@10 132 Gd = E + G;
yading@10 133
yading@10 134 Add = F + Ad;
yading@10 135 Bdd = Bd - H;
yading@10 136
yading@10 137 Fd = F - Ad;
yading@10 138 Hd = Bd + H;
yading@10 139
yading@10 140 /* Final sequence of operations over-write original inputs. */
yading@10 141 if (type == 1) {
yading@10 142 dst[0*stride] = av_clip_uint8((Gd + Cd ) >> 4);
yading@10 143 dst[7*stride] = av_clip_uint8((Gd - Cd ) >> 4);
yading@10 144
yading@10 145 dst[1*stride] = av_clip_uint8((Add + Hd ) >> 4);
yading@10 146 dst[2*stride] = av_clip_uint8((Add - Hd ) >> 4);
yading@10 147
yading@10 148 dst[3*stride] = av_clip_uint8((Ed + Dd ) >> 4);
yading@10 149 dst[4*stride] = av_clip_uint8((Ed - Dd ) >> 4);
yading@10 150
yading@10 151 dst[5*stride] = av_clip_uint8((Fd + Bdd ) >> 4);
yading@10 152 dst[6*stride] = av_clip_uint8((Fd - Bdd ) >> 4);
yading@10 153 }else{
yading@10 154 dst[0*stride] = av_clip_uint8(dst[0*stride] + ((Gd + Cd ) >> 4));
yading@10 155 dst[7*stride] = av_clip_uint8(dst[7*stride] + ((Gd - Cd ) >> 4));
yading@10 156
yading@10 157 dst[1*stride] = av_clip_uint8(dst[1*stride] + ((Add + Hd ) >> 4));
yading@10 158 dst[2*stride] = av_clip_uint8(dst[2*stride] + ((Add - Hd ) >> 4));
yading@10 159
yading@10 160 dst[3*stride] = av_clip_uint8(dst[3*stride] + ((Ed + Dd ) >> 4));
yading@10 161 dst[4*stride] = av_clip_uint8(dst[4*stride] + ((Ed - Dd ) >> 4));
yading@10 162
yading@10 163 dst[5*stride] = av_clip_uint8(dst[5*stride] + ((Fd + Bdd ) >> 4));
yading@10 164 dst[6*stride] = av_clip_uint8(dst[6*stride] + ((Fd - Bdd ) >> 4));
yading@10 165 }
yading@10 166
yading@10 167 } else {
yading@10 168 if (type == 1) {
yading@10 169 dst[0*stride]=
yading@10 170 dst[1*stride]=
yading@10 171 dst[2*stride]=
yading@10 172 dst[3*stride]=
yading@10 173 dst[4*stride]=
yading@10 174 dst[5*stride]=
yading@10 175 dst[6*stride]=
yading@10 176 dst[7*stride]= av_clip_uint8(128 + ((xC4S4 * ip[0] + (IdctAdjustBeforeShift<<16))>>20));
yading@10 177 }else{
yading@10 178 if(ip[0]){
yading@10 179 int v= ((xC4S4 * ip[0] + (IdctAdjustBeforeShift<<16))>>20);
yading@10 180 dst[0*stride] = av_clip_uint8(dst[0*stride] + v);
yading@10 181 dst[1*stride] = av_clip_uint8(dst[1*stride] + v);
yading@10 182 dst[2*stride] = av_clip_uint8(dst[2*stride] + v);
yading@10 183 dst[3*stride] = av_clip_uint8(dst[3*stride] + v);
yading@10 184 dst[4*stride] = av_clip_uint8(dst[4*stride] + v);
yading@10 185 dst[5*stride] = av_clip_uint8(dst[5*stride] + v);
yading@10 186 dst[6*stride] = av_clip_uint8(dst[6*stride] + v);
yading@10 187 dst[7*stride] = av_clip_uint8(dst[7*stride] + v);
yading@10 188 }
yading@10 189 }
yading@10 190 }
yading@10 191
yading@10 192 ip += 8; /* next column */
yading@10 193 dst++;
yading@10 194 }
yading@10 195 }
yading@10 196
yading@10 197 static void vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size,
yading@10 198 int16_t *block/*align 16*/)
yading@10 199 {
yading@10 200 idct(dest, line_size, block, 1);
yading@10 201 memset(block, 0, sizeof(*block) * 64);
yading@10 202 }
yading@10 203
yading@10 204 static void vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size,
yading@10 205 int16_t *block/*align 16*/)
yading@10 206 {
yading@10 207 idct(dest, line_size, block, 2);
yading@10 208 memset(block, 0, sizeof(*block) * 64);
yading@10 209 }
yading@10 210
yading@10 211 static void vp3_idct_dc_add_c(uint8_t *dest/*align 8*/, int line_size,
yading@10 212 int16_t *block/*align 16*/)
yading@10 213 {
yading@10 214 int i, dc = (block[0] + 15) >> 5;
yading@10 215
yading@10 216 for(i = 0; i < 8; i++){
yading@10 217 dest[0] = av_clip_uint8(dest[0] + dc);
yading@10 218 dest[1] = av_clip_uint8(dest[1] + dc);
yading@10 219 dest[2] = av_clip_uint8(dest[2] + dc);
yading@10 220 dest[3] = av_clip_uint8(dest[3] + dc);
yading@10 221 dest[4] = av_clip_uint8(dest[4] + dc);
yading@10 222 dest[5] = av_clip_uint8(dest[5] + dc);
yading@10 223 dest[6] = av_clip_uint8(dest[6] + dc);
yading@10 224 dest[7] = av_clip_uint8(dest[7] + dc);
yading@10 225 dest += line_size;
yading@10 226 }
yading@10 227 block[0] = 0;
yading@10 228 }
yading@10 229
yading@10 230 static void vp3_v_loop_filter_c(uint8_t *first_pixel, int stride,
yading@10 231 int *bounding_values)
yading@10 232 {
yading@10 233 unsigned char *end;
yading@10 234 int filter_value;
yading@10 235 const int nstride= -stride;
yading@10 236
yading@10 237 for (end= first_pixel + 8; first_pixel < end; first_pixel++) {
yading@10 238 filter_value =
yading@10 239 (first_pixel[2 * nstride] - first_pixel[ stride])
yading@10 240 +3*(first_pixel[0 ] - first_pixel[nstride]);
yading@10 241 filter_value = bounding_values[(filter_value + 4) >> 3];
yading@10 242 first_pixel[nstride] = av_clip_uint8(first_pixel[nstride] + filter_value);
yading@10 243 first_pixel[0] = av_clip_uint8(first_pixel[0] - filter_value);
yading@10 244 }
yading@10 245 }
yading@10 246
yading@10 247 static void vp3_h_loop_filter_c(uint8_t *first_pixel, int stride,
yading@10 248 int *bounding_values)
yading@10 249 {
yading@10 250 unsigned char *end;
yading@10 251 int filter_value;
yading@10 252
yading@10 253 for (end= first_pixel + 8*stride; first_pixel != end; first_pixel += stride) {
yading@10 254 filter_value =
yading@10 255 (first_pixel[-2] - first_pixel[ 1])
yading@10 256 +3*(first_pixel[ 0] - first_pixel[-1]);
yading@10 257 filter_value = bounding_values[(filter_value + 4) >> 3];
yading@10 258 first_pixel[-1] = av_clip_uint8(first_pixel[-1] + filter_value);
yading@10 259 first_pixel[ 0] = av_clip_uint8(first_pixel[ 0] - filter_value);
yading@10 260 }
yading@10 261 }
yading@10 262
yading@10 263 static void put_no_rnd_pixels_l2(uint8_t *dst, const uint8_t *src1,
yading@10 264 const uint8_t *src2, ptrdiff_t stride, int h)
yading@10 265 {
yading@10 266 int i;
yading@10 267
yading@10 268 for (i = 0; i < h; i++) {
yading@10 269 uint32_t a, b;
yading@10 270
yading@10 271 a = AV_RN32(&src1[i * stride]);
yading@10 272 b = AV_RN32(&src2[i * stride]);
yading@10 273 AV_WN32A(&dst[i * stride], no_rnd_avg32(a, b));
yading@10 274 a = AV_RN32(&src1[i * stride + 4]);
yading@10 275 b = AV_RN32(&src2[i * stride + 4]);
yading@10 276 AV_WN32A(&dst[i * stride + 4], no_rnd_avg32(a, b));
yading@10 277 }
yading@10 278 }
yading@10 279
yading@10 280 av_cold void ff_vp3dsp_init(VP3DSPContext *c, int flags)
yading@10 281 {
yading@10 282 c->put_no_rnd_pixels_l2 = put_no_rnd_pixels_l2;
yading@10 283
yading@10 284 c->idct_put = vp3_idct_put_c;
yading@10 285 c->idct_add = vp3_idct_add_c;
yading@10 286 c->idct_dc_add = vp3_idct_dc_add_c;
yading@10 287 c->v_loop_filter = vp3_v_loop_filter_c;
yading@10 288 c->h_loop_filter = vp3_h_loop_filter_c;
yading@10 289
yading@10 290 if (ARCH_ARM)
yading@10 291 ff_vp3dsp_init_arm(c, flags);
yading@10 292 if (ARCH_BFIN)
yading@10 293 ff_vp3dsp_init_bfin(c, flags);
yading@10 294 if (ARCH_PPC)
yading@10 295 ff_vp3dsp_init_ppc(c, flags);
yading@10 296 if (ARCH_X86)
yading@10 297 ff_vp3dsp_init_x86(c, flags);
yading@10 298 }