annotate ffmpeg/libavcodec/simple_idct_template.c @ 13:844d341cf643 tip

Back up before ISMIR
author Yading Song <yading.song@eecs.qmul.ac.uk>
date Thu, 31 Oct 2013 13:17:06 +0000
parents 6840f77b83aa
children
rev   line source
yading@10 1 /*
yading@10 2 * Simple IDCT
yading@10 3 *
yading@10 4 * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
yading@10 5 *
yading@10 6 * This file is part of FFmpeg.
yading@10 7 *
yading@10 8 * FFmpeg is free software; you can redistribute it and/or
yading@10 9 * modify it under the terms of the GNU Lesser General Public
yading@10 10 * License as published by the Free Software Foundation; either
yading@10 11 * version 2.1 of the License, or (at your option) any later version.
yading@10 12 *
yading@10 13 * FFmpeg is distributed in the hope that it will be useful,
yading@10 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
yading@10 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
yading@10 16 * Lesser General Public License for more details.
yading@10 17 *
yading@10 18 * You should have received a copy of the GNU Lesser General Public
yading@10 19 * License along with FFmpeg; if not, write to the Free Software
yading@10 20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
yading@10 21 */
yading@10 22
yading@10 23 /**
yading@10 24 * @file
yading@10 25 * simpleidct in C.
yading@10 26 */
yading@10 27
yading@10 28 /*
yading@10 29 based upon some outcommented c code from mpeg2dec (idct_mmx.c
yading@10 30 written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>)
yading@10 31 */
yading@10 32
yading@10 33 #include "bit_depth_template.c"
yading@10 34
yading@10 35 #undef W1
yading@10 36 #undef W2
yading@10 37 #undef W3
yading@10 38 #undef W4
yading@10 39 #undef W5
yading@10 40 #undef W6
yading@10 41 #undef W7
yading@10 42 #undef ROW_SHIFT
yading@10 43 #undef COL_SHIFT
yading@10 44 #undef DC_SHIFT
yading@10 45 #undef MUL
yading@10 46 #undef MAC
yading@10 47
yading@10 48 #if BIT_DEPTH == 8
yading@10 49
yading@10 50 #define W1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
yading@10 51 #define W2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
yading@10 52 #define W3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
yading@10 53 #define W4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
yading@10 54 #define W5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
yading@10 55 #define W6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
yading@10 56 #define W7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
yading@10 57
yading@10 58 #define ROW_SHIFT 11
yading@10 59 #define COL_SHIFT 20
yading@10 60 #define DC_SHIFT 3
yading@10 61
yading@10 62 #define MUL(a, b) MUL16(a, b)
yading@10 63 #define MAC(a, b, c) MAC16(a, b, c)
yading@10 64
yading@10 65 #elif BIT_DEPTH == 10
yading@10 66
yading@10 67 #define W1 90901
yading@10 68 #define W2 85627
yading@10 69 #define W3 77062
yading@10 70 #define W4 65535
yading@10 71 #define W5 51491
yading@10 72 #define W6 35468
yading@10 73 #define W7 18081
yading@10 74
yading@10 75 #define ROW_SHIFT 15
yading@10 76 #define COL_SHIFT 20
yading@10 77 #define DC_SHIFT 1
yading@10 78
yading@10 79 #define MUL(a, b) ((a) * (b))
yading@10 80 #define MAC(a, b, c) ((a) += (b) * (c))
yading@10 81
yading@10 82 #else
yading@10 83
yading@10 84 #error "Unsupported bitdepth"
yading@10 85
yading@10 86 #endif
yading@10 87
yading@10 88 static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift)
yading@10 89 {
yading@10 90 int a0, a1, a2, a3, b0, b1, b2, b3;
yading@10 91
yading@10 92 #if HAVE_FAST_64BIT
yading@10 93 #define ROW0_MASK (0xffffLL << 48 * HAVE_BIGENDIAN)
yading@10 94 if (((((uint64_t *)row)[0] & ~ROW0_MASK) | ((uint64_t *)row)[1]) == 0) {
yading@10 95 uint64_t temp;
yading@10 96 if (DC_SHIFT - extra_shift > 0) {
yading@10 97 temp = (row[0] << (DC_SHIFT - extra_shift)) & 0xffff;
yading@10 98 } else {
yading@10 99 temp = (row[0] >> (extra_shift - DC_SHIFT)) & 0xffff;
yading@10 100 }
yading@10 101 temp += temp << 16;
yading@10 102 temp += temp << 32;
yading@10 103 ((uint64_t *)row)[0] = temp;
yading@10 104 ((uint64_t *)row)[1] = temp;
yading@10 105 return;
yading@10 106 }
yading@10 107 #else
yading@10 108 if (!(((uint32_t*)row)[1] |
yading@10 109 ((uint32_t*)row)[2] |
yading@10 110 ((uint32_t*)row)[3] |
yading@10 111 row[1])) {
yading@10 112 uint32_t temp;
yading@10 113 if (DC_SHIFT - extra_shift > 0) {
yading@10 114 temp = (row[0] << (DC_SHIFT - extra_shift)) & 0xffff;
yading@10 115 } else {
yading@10 116 temp = (row[0] >> (extra_shift - DC_SHIFT)) & 0xffff;
yading@10 117 }
yading@10 118 temp += temp << 16;
yading@10 119 ((uint32_t*)row)[0]=((uint32_t*)row)[1] =
yading@10 120 ((uint32_t*)row)[2]=((uint32_t*)row)[3] = temp;
yading@10 121 return;
yading@10 122 }
yading@10 123 #endif
yading@10 124
yading@10 125 a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1));
yading@10 126 a1 = a0;
yading@10 127 a2 = a0;
yading@10 128 a3 = a0;
yading@10 129
yading@10 130 a0 += W2 * row[2];
yading@10 131 a1 += W6 * row[2];
yading@10 132 a2 -= W6 * row[2];
yading@10 133 a3 -= W2 * row[2];
yading@10 134
yading@10 135 b0 = MUL(W1, row[1]);
yading@10 136 MAC(b0, W3, row[3]);
yading@10 137 b1 = MUL(W3, row[1]);
yading@10 138 MAC(b1, -W7, row[3]);
yading@10 139 b2 = MUL(W5, row[1]);
yading@10 140 MAC(b2, -W1, row[3]);
yading@10 141 b3 = MUL(W7, row[1]);
yading@10 142 MAC(b3, -W5, row[3]);
yading@10 143
yading@10 144 if (AV_RN64A(row + 4)) {
yading@10 145 a0 += W4*row[4] + W6*row[6];
yading@10 146 a1 += - W4*row[4] - W2*row[6];
yading@10 147 a2 += - W4*row[4] + W2*row[6];
yading@10 148 a3 += W4*row[4] - W6*row[6];
yading@10 149
yading@10 150 MAC(b0, W5, row[5]);
yading@10 151 MAC(b0, W7, row[7]);
yading@10 152
yading@10 153 MAC(b1, -W1, row[5]);
yading@10 154 MAC(b1, -W5, row[7]);
yading@10 155
yading@10 156 MAC(b2, W7, row[5]);
yading@10 157 MAC(b2, W3, row[7]);
yading@10 158
yading@10 159 MAC(b3, W3, row[5]);
yading@10 160 MAC(b3, -W1, row[7]);
yading@10 161 }
yading@10 162
yading@10 163 row[0] = (a0 + b0) >> (ROW_SHIFT + extra_shift);
yading@10 164 row[7] = (a0 - b0) >> (ROW_SHIFT + extra_shift);
yading@10 165 row[1] = (a1 + b1) >> (ROW_SHIFT + extra_shift);
yading@10 166 row[6] = (a1 - b1) >> (ROW_SHIFT + extra_shift);
yading@10 167 row[2] = (a2 + b2) >> (ROW_SHIFT + extra_shift);
yading@10 168 row[5] = (a2 - b2) >> (ROW_SHIFT + extra_shift);
yading@10 169 row[3] = (a3 + b3) >> (ROW_SHIFT + extra_shift);
yading@10 170 row[4] = (a3 - b3) >> (ROW_SHIFT + extra_shift);
yading@10 171 }
yading@10 172
yading@10 173 #define IDCT_COLS do { \
yading@10 174 a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); \
yading@10 175 a1 = a0; \
yading@10 176 a2 = a0; \
yading@10 177 a3 = a0; \
yading@10 178 \
yading@10 179 a0 += W2*col[8*2]; \
yading@10 180 a1 += W6*col[8*2]; \
yading@10 181 a2 += -W6*col[8*2]; \
yading@10 182 a3 += -W2*col[8*2]; \
yading@10 183 \
yading@10 184 b0 = MUL(W1, col[8*1]); \
yading@10 185 b1 = MUL(W3, col[8*1]); \
yading@10 186 b2 = MUL(W5, col[8*1]); \
yading@10 187 b3 = MUL(W7, col[8*1]); \
yading@10 188 \
yading@10 189 MAC(b0, W3, col[8*3]); \
yading@10 190 MAC(b1, -W7, col[8*3]); \
yading@10 191 MAC(b2, -W1, col[8*3]); \
yading@10 192 MAC(b3, -W5, col[8*3]); \
yading@10 193 \
yading@10 194 if (col[8*4]) { \
yading@10 195 a0 += W4*col[8*4]; \
yading@10 196 a1 += -W4*col[8*4]; \
yading@10 197 a2 += -W4*col[8*4]; \
yading@10 198 a3 += W4*col[8*4]; \
yading@10 199 } \
yading@10 200 \
yading@10 201 if (col[8*5]) { \
yading@10 202 MAC(b0, W5, col[8*5]); \
yading@10 203 MAC(b1, -W1, col[8*5]); \
yading@10 204 MAC(b2, W7, col[8*5]); \
yading@10 205 MAC(b3, W3, col[8*5]); \
yading@10 206 } \
yading@10 207 \
yading@10 208 if (col[8*6]) { \
yading@10 209 a0 += W6*col[8*6]; \
yading@10 210 a1 += -W2*col[8*6]; \
yading@10 211 a2 += W2*col[8*6]; \
yading@10 212 a3 += -W6*col[8*6]; \
yading@10 213 } \
yading@10 214 \
yading@10 215 if (col[8*7]) { \
yading@10 216 MAC(b0, W7, col[8*7]); \
yading@10 217 MAC(b1, -W5, col[8*7]); \
yading@10 218 MAC(b2, W3, col[8*7]); \
yading@10 219 MAC(b3, -W1, col[8*7]); \
yading@10 220 } \
yading@10 221 } while (0)
yading@10 222
yading@10 223 static inline void FUNC(idctSparseColPut)(pixel *dest, int line_size,
yading@10 224 int16_t *col)
yading@10 225 {
yading@10 226 int a0, a1, a2, a3, b0, b1, b2, b3;
yading@10 227
yading@10 228 IDCT_COLS;
yading@10 229
yading@10 230 dest[0] = av_clip_pixel((a0 + b0) >> COL_SHIFT);
yading@10 231 dest += line_size;
yading@10 232 dest[0] = av_clip_pixel((a1 + b1) >> COL_SHIFT);
yading@10 233 dest += line_size;
yading@10 234 dest[0] = av_clip_pixel((a2 + b2) >> COL_SHIFT);
yading@10 235 dest += line_size;
yading@10 236 dest[0] = av_clip_pixel((a3 + b3) >> COL_SHIFT);
yading@10 237 dest += line_size;
yading@10 238 dest[0] = av_clip_pixel((a3 - b3) >> COL_SHIFT);
yading@10 239 dest += line_size;
yading@10 240 dest[0] = av_clip_pixel((a2 - b2) >> COL_SHIFT);
yading@10 241 dest += line_size;
yading@10 242 dest[0] = av_clip_pixel((a1 - b1) >> COL_SHIFT);
yading@10 243 dest += line_size;
yading@10 244 dest[0] = av_clip_pixel((a0 - b0) >> COL_SHIFT);
yading@10 245 }
yading@10 246
yading@10 247 static inline void FUNC(idctSparseColAdd)(pixel *dest, int line_size,
yading@10 248 int16_t *col)
yading@10 249 {
yading@10 250 int a0, a1, a2, a3, b0, b1, b2, b3;
yading@10 251
yading@10 252 IDCT_COLS;
yading@10 253
yading@10 254 dest[0] = av_clip_pixel(dest[0] + ((a0 + b0) >> COL_SHIFT));
yading@10 255 dest += line_size;
yading@10 256 dest[0] = av_clip_pixel(dest[0] + ((a1 + b1) >> COL_SHIFT));
yading@10 257 dest += line_size;
yading@10 258 dest[0] = av_clip_pixel(dest[0] + ((a2 + b2) >> COL_SHIFT));
yading@10 259 dest += line_size;
yading@10 260 dest[0] = av_clip_pixel(dest[0] + ((a3 + b3) >> COL_SHIFT));
yading@10 261 dest += line_size;
yading@10 262 dest[0] = av_clip_pixel(dest[0] + ((a3 - b3) >> COL_SHIFT));
yading@10 263 dest += line_size;
yading@10 264 dest[0] = av_clip_pixel(dest[0] + ((a2 - b2) >> COL_SHIFT));
yading@10 265 dest += line_size;
yading@10 266 dest[0] = av_clip_pixel(dest[0] + ((a1 - b1) >> COL_SHIFT));
yading@10 267 dest += line_size;
yading@10 268 dest[0] = av_clip_pixel(dest[0] + ((a0 - b0) >> COL_SHIFT));
yading@10 269 }
yading@10 270
yading@10 271 static inline void FUNC(idctSparseCol)(int16_t *col)
yading@10 272 {
yading@10 273 int a0, a1, a2, a3, b0, b1, b2, b3;
yading@10 274
yading@10 275 IDCT_COLS;
yading@10 276
yading@10 277 col[0 ] = ((a0 + b0) >> COL_SHIFT);
yading@10 278 col[8 ] = ((a1 + b1) >> COL_SHIFT);
yading@10 279 col[16] = ((a2 + b2) >> COL_SHIFT);
yading@10 280 col[24] = ((a3 + b3) >> COL_SHIFT);
yading@10 281 col[32] = ((a3 - b3) >> COL_SHIFT);
yading@10 282 col[40] = ((a2 - b2) >> COL_SHIFT);
yading@10 283 col[48] = ((a1 - b1) >> COL_SHIFT);
yading@10 284 col[56] = ((a0 - b0) >> COL_SHIFT);
yading@10 285 }
yading@10 286
yading@10 287 void FUNC(ff_simple_idct_put)(uint8_t *dest_, int line_size, int16_t *block)
yading@10 288 {
yading@10 289 pixel *dest = (pixel *)dest_;
yading@10 290 int i;
yading@10 291
yading@10 292 line_size /= sizeof(pixel);
yading@10 293
yading@10 294 for (i = 0; i < 8; i++)
yading@10 295 FUNC(idctRowCondDC)(block + i*8, 0);
yading@10 296
yading@10 297 for (i = 0; i < 8; i++)
yading@10 298 FUNC(idctSparseColPut)(dest + i, line_size, block + i);
yading@10 299 }
yading@10 300
yading@10 301 void FUNC(ff_simple_idct_add)(uint8_t *dest_, int line_size, int16_t *block)
yading@10 302 {
yading@10 303 pixel *dest = (pixel *)dest_;
yading@10 304 int i;
yading@10 305
yading@10 306 line_size /= sizeof(pixel);
yading@10 307
yading@10 308 for (i = 0; i < 8; i++)
yading@10 309 FUNC(idctRowCondDC)(block + i*8, 0);
yading@10 310
yading@10 311 for (i = 0; i < 8; i++)
yading@10 312 FUNC(idctSparseColAdd)(dest + i, line_size, block + i);
yading@10 313 }
yading@10 314
yading@10 315 void FUNC(ff_simple_idct)(int16_t *block)
yading@10 316 {
yading@10 317 int i;
yading@10 318
yading@10 319 for (i = 0; i < 8; i++)
yading@10 320 FUNC(idctRowCondDC)(block + i*8, 0);
yading@10 321
yading@10 322 for (i = 0; i < 8; i++)
yading@10 323 FUNC(idctSparseCol)(block + i);
yading@10 324 }