yading@10
|
1 /*
|
yading@10
|
2 * Simple IDCT
|
yading@10
|
3 *
|
yading@10
|
4 * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
|
yading@10
|
5 *
|
yading@10
|
6 * This file is part of FFmpeg.
|
yading@10
|
7 *
|
yading@10
|
8 * FFmpeg is free software; you can redistribute it and/or
|
yading@10
|
9 * modify it under the terms of the GNU Lesser General Public
|
yading@10
|
10 * License as published by the Free Software Foundation; either
|
yading@10
|
11 * version 2.1 of the License, or (at your option) any later version.
|
yading@10
|
12 *
|
yading@10
|
13 * FFmpeg is distributed in the hope that it will be useful,
|
yading@10
|
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
yading@10
|
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
yading@10
|
16 * Lesser General Public License for more details.
|
yading@10
|
17 *
|
yading@10
|
18 * You should have received a copy of the GNU Lesser General Public
|
yading@10
|
19 * License along with FFmpeg; if not, write to the Free Software
|
yading@10
|
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
yading@10
|
21 */
|
yading@10
|
22
|
yading@10
|
23 /**
|
yading@10
|
24 * @file
|
yading@10
|
25 * simpleidct in C.
|
yading@10
|
26 */
|
yading@10
|
27
|
yading@10
|
28 /*
|
yading@10
|
29 based upon some outcommented c code from mpeg2dec (idct_mmx.c
|
yading@10
|
30 written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>)
|
yading@10
|
31 */
|
yading@10
|
32
|
yading@10
|
33 #include "bit_depth_template.c"
|
yading@10
|
34
|
yading@10
|
35 #undef W1
|
yading@10
|
36 #undef W2
|
yading@10
|
37 #undef W3
|
yading@10
|
38 #undef W4
|
yading@10
|
39 #undef W5
|
yading@10
|
40 #undef W6
|
yading@10
|
41 #undef W7
|
yading@10
|
42 #undef ROW_SHIFT
|
yading@10
|
43 #undef COL_SHIFT
|
yading@10
|
44 #undef DC_SHIFT
|
yading@10
|
45 #undef MUL
|
yading@10
|
46 #undef MAC
|
yading@10
|
47
|
yading@10
|
48 #if BIT_DEPTH == 8
|
yading@10
|
49
|
yading@10
|
50 #define W1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
|
yading@10
|
51 #define W2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
|
yading@10
|
52 #define W3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
|
yading@10
|
53 #define W4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
|
yading@10
|
54 #define W5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
|
yading@10
|
55 #define W6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
|
yading@10
|
56 #define W7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
|
yading@10
|
57
|
yading@10
|
58 #define ROW_SHIFT 11
|
yading@10
|
59 #define COL_SHIFT 20
|
yading@10
|
60 #define DC_SHIFT 3
|
yading@10
|
61
|
yading@10
|
62 #define MUL(a, b) MUL16(a, b)
|
yading@10
|
63 #define MAC(a, b, c) MAC16(a, b, c)
|
yading@10
|
64
|
yading@10
|
65 #elif BIT_DEPTH == 10
|
yading@10
|
66
|
yading@10
|
67 #define W1 90901
|
yading@10
|
68 #define W2 85627
|
yading@10
|
69 #define W3 77062
|
yading@10
|
70 #define W4 65535
|
yading@10
|
71 #define W5 51491
|
yading@10
|
72 #define W6 35468
|
yading@10
|
73 #define W7 18081
|
yading@10
|
74
|
yading@10
|
75 #define ROW_SHIFT 15
|
yading@10
|
76 #define COL_SHIFT 20
|
yading@10
|
77 #define DC_SHIFT 1
|
yading@10
|
78
|
yading@10
|
79 #define MUL(a, b) ((a) * (b))
|
yading@10
|
80 #define MAC(a, b, c) ((a) += (b) * (c))
|
yading@10
|
81
|
yading@10
|
82 #else
|
yading@10
|
83
|
yading@10
|
84 #error "Unsupported bitdepth"
|
yading@10
|
85
|
yading@10
|
86 #endif
|
yading@10
|
87
|
yading@10
|
88 static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift)
|
yading@10
|
89 {
|
yading@10
|
90 int a0, a1, a2, a3, b0, b1, b2, b3;
|
yading@10
|
91
|
yading@10
|
92 #if HAVE_FAST_64BIT
|
yading@10
|
93 #define ROW0_MASK (0xffffLL << 48 * HAVE_BIGENDIAN)
|
yading@10
|
94 if (((((uint64_t *)row)[0] & ~ROW0_MASK) | ((uint64_t *)row)[1]) == 0) {
|
yading@10
|
95 uint64_t temp;
|
yading@10
|
96 if (DC_SHIFT - extra_shift > 0) {
|
yading@10
|
97 temp = (row[0] << (DC_SHIFT - extra_shift)) & 0xffff;
|
yading@10
|
98 } else {
|
yading@10
|
99 temp = (row[0] >> (extra_shift - DC_SHIFT)) & 0xffff;
|
yading@10
|
100 }
|
yading@10
|
101 temp += temp << 16;
|
yading@10
|
102 temp += temp << 32;
|
yading@10
|
103 ((uint64_t *)row)[0] = temp;
|
yading@10
|
104 ((uint64_t *)row)[1] = temp;
|
yading@10
|
105 return;
|
yading@10
|
106 }
|
yading@10
|
107 #else
|
yading@10
|
108 if (!(((uint32_t*)row)[1] |
|
yading@10
|
109 ((uint32_t*)row)[2] |
|
yading@10
|
110 ((uint32_t*)row)[3] |
|
yading@10
|
111 row[1])) {
|
yading@10
|
112 uint32_t temp;
|
yading@10
|
113 if (DC_SHIFT - extra_shift > 0) {
|
yading@10
|
114 temp = (row[0] << (DC_SHIFT - extra_shift)) & 0xffff;
|
yading@10
|
115 } else {
|
yading@10
|
116 temp = (row[0] >> (extra_shift - DC_SHIFT)) & 0xffff;
|
yading@10
|
117 }
|
yading@10
|
118 temp += temp << 16;
|
yading@10
|
119 ((uint32_t*)row)[0]=((uint32_t*)row)[1] =
|
yading@10
|
120 ((uint32_t*)row)[2]=((uint32_t*)row)[3] = temp;
|
yading@10
|
121 return;
|
yading@10
|
122 }
|
yading@10
|
123 #endif
|
yading@10
|
124
|
yading@10
|
125 a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1));
|
yading@10
|
126 a1 = a0;
|
yading@10
|
127 a2 = a0;
|
yading@10
|
128 a3 = a0;
|
yading@10
|
129
|
yading@10
|
130 a0 += W2 * row[2];
|
yading@10
|
131 a1 += W6 * row[2];
|
yading@10
|
132 a2 -= W6 * row[2];
|
yading@10
|
133 a3 -= W2 * row[2];
|
yading@10
|
134
|
yading@10
|
135 b0 = MUL(W1, row[1]);
|
yading@10
|
136 MAC(b0, W3, row[3]);
|
yading@10
|
137 b1 = MUL(W3, row[1]);
|
yading@10
|
138 MAC(b1, -W7, row[3]);
|
yading@10
|
139 b2 = MUL(W5, row[1]);
|
yading@10
|
140 MAC(b2, -W1, row[3]);
|
yading@10
|
141 b3 = MUL(W7, row[1]);
|
yading@10
|
142 MAC(b3, -W5, row[3]);
|
yading@10
|
143
|
yading@10
|
144 if (AV_RN64A(row + 4)) {
|
yading@10
|
145 a0 += W4*row[4] + W6*row[6];
|
yading@10
|
146 a1 += - W4*row[4] - W2*row[6];
|
yading@10
|
147 a2 += - W4*row[4] + W2*row[6];
|
yading@10
|
148 a3 += W4*row[4] - W6*row[6];
|
yading@10
|
149
|
yading@10
|
150 MAC(b0, W5, row[5]);
|
yading@10
|
151 MAC(b0, W7, row[7]);
|
yading@10
|
152
|
yading@10
|
153 MAC(b1, -W1, row[5]);
|
yading@10
|
154 MAC(b1, -W5, row[7]);
|
yading@10
|
155
|
yading@10
|
156 MAC(b2, W7, row[5]);
|
yading@10
|
157 MAC(b2, W3, row[7]);
|
yading@10
|
158
|
yading@10
|
159 MAC(b3, W3, row[5]);
|
yading@10
|
160 MAC(b3, -W1, row[7]);
|
yading@10
|
161 }
|
yading@10
|
162
|
yading@10
|
163 row[0] = (a0 + b0) >> (ROW_SHIFT + extra_shift);
|
yading@10
|
164 row[7] = (a0 - b0) >> (ROW_SHIFT + extra_shift);
|
yading@10
|
165 row[1] = (a1 + b1) >> (ROW_SHIFT + extra_shift);
|
yading@10
|
166 row[6] = (a1 - b1) >> (ROW_SHIFT + extra_shift);
|
yading@10
|
167 row[2] = (a2 + b2) >> (ROW_SHIFT + extra_shift);
|
yading@10
|
168 row[5] = (a2 - b2) >> (ROW_SHIFT + extra_shift);
|
yading@10
|
169 row[3] = (a3 + b3) >> (ROW_SHIFT + extra_shift);
|
yading@10
|
170 row[4] = (a3 - b3) >> (ROW_SHIFT + extra_shift);
|
yading@10
|
171 }
|
yading@10
|
172
|
yading@10
|
173 #define IDCT_COLS do { \
|
yading@10
|
174 a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); \
|
yading@10
|
175 a1 = a0; \
|
yading@10
|
176 a2 = a0; \
|
yading@10
|
177 a3 = a0; \
|
yading@10
|
178 \
|
yading@10
|
179 a0 += W2*col[8*2]; \
|
yading@10
|
180 a1 += W6*col[8*2]; \
|
yading@10
|
181 a2 += -W6*col[8*2]; \
|
yading@10
|
182 a3 += -W2*col[8*2]; \
|
yading@10
|
183 \
|
yading@10
|
184 b0 = MUL(W1, col[8*1]); \
|
yading@10
|
185 b1 = MUL(W3, col[8*1]); \
|
yading@10
|
186 b2 = MUL(W5, col[8*1]); \
|
yading@10
|
187 b3 = MUL(W7, col[8*1]); \
|
yading@10
|
188 \
|
yading@10
|
189 MAC(b0, W3, col[8*3]); \
|
yading@10
|
190 MAC(b1, -W7, col[8*3]); \
|
yading@10
|
191 MAC(b2, -W1, col[8*3]); \
|
yading@10
|
192 MAC(b3, -W5, col[8*3]); \
|
yading@10
|
193 \
|
yading@10
|
194 if (col[8*4]) { \
|
yading@10
|
195 a0 += W4*col[8*4]; \
|
yading@10
|
196 a1 += -W4*col[8*4]; \
|
yading@10
|
197 a2 += -W4*col[8*4]; \
|
yading@10
|
198 a3 += W4*col[8*4]; \
|
yading@10
|
199 } \
|
yading@10
|
200 \
|
yading@10
|
201 if (col[8*5]) { \
|
yading@10
|
202 MAC(b0, W5, col[8*5]); \
|
yading@10
|
203 MAC(b1, -W1, col[8*5]); \
|
yading@10
|
204 MAC(b2, W7, col[8*5]); \
|
yading@10
|
205 MAC(b3, W3, col[8*5]); \
|
yading@10
|
206 } \
|
yading@10
|
207 \
|
yading@10
|
208 if (col[8*6]) { \
|
yading@10
|
209 a0 += W6*col[8*6]; \
|
yading@10
|
210 a1 += -W2*col[8*6]; \
|
yading@10
|
211 a2 += W2*col[8*6]; \
|
yading@10
|
212 a3 += -W6*col[8*6]; \
|
yading@10
|
213 } \
|
yading@10
|
214 \
|
yading@10
|
215 if (col[8*7]) { \
|
yading@10
|
216 MAC(b0, W7, col[8*7]); \
|
yading@10
|
217 MAC(b1, -W5, col[8*7]); \
|
yading@10
|
218 MAC(b2, W3, col[8*7]); \
|
yading@10
|
219 MAC(b3, -W1, col[8*7]); \
|
yading@10
|
220 } \
|
yading@10
|
221 } while (0)
|
yading@10
|
222
|
yading@10
|
223 static inline void FUNC(idctSparseColPut)(pixel *dest, int line_size,
|
yading@10
|
224 int16_t *col)
|
yading@10
|
225 {
|
yading@10
|
226 int a0, a1, a2, a3, b0, b1, b2, b3;
|
yading@10
|
227
|
yading@10
|
228 IDCT_COLS;
|
yading@10
|
229
|
yading@10
|
230 dest[0] = av_clip_pixel((a0 + b0) >> COL_SHIFT);
|
yading@10
|
231 dest += line_size;
|
yading@10
|
232 dest[0] = av_clip_pixel((a1 + b1) >> COL_SHIFT);
|
yading@10
|
233 dest += line_size;
|
yading@10
|
234 dest[0] = av_clip_pixel((a2 + b2) >> COL_SHIFT);
|
yading@10
|
235 dest += line_size;
|
yading@10
|
236 dest[0] = av_clip_pixel((a3 + b3) >> COL_SHIFT);
|
yading@10
|
237 dest += line_size;
|
yading@10
|
238 dest[0] = av_clip_pixel((a3 - b3) >> COL_SHIFT);
|
yading@10
|
239 dest += line_size;
|
yading@10
|
240 dest[0] = av_clip_pixel((a2 - b2) >> COL_SHIFT);
|
yading@10
|
241 dest += line_size;
|
yading@10
|
242 dest[0] = av_clip_pixel((a1 - b1) >> COL_SHIFT);
|
yading@10
|
243 dest += line_size;
|
yading@10
|
244 dest[0] = av_clip_pixel((a0 - b0) >> COL_SHIFT);
|
yading@10
|
245 }
|
yading@10
|
246
|
yading@10
|
247 static inline void FUNC(idctSparseColAdd)(pixel *dest, int line_size,
|
yading@10
|
248 int16_t *col)
|
yading@10
|
249 {
|
yading@10
|
250 int a0, a1, a2, a3, b0, b1, b2, b3;
|
yading@10
|
251
|
yading@10
|
252 IDCT_COLS;
|
yading@10
|
253
|
yading@10
|
254 dest[0] = av_clip_pixel(dest[0] + ((a0 + b0) >> COL_SHIFT));
|
yading@10
|
255 dest += line_size;
|
yading@10
|
256 dest[0] = av_clip_pixel(dest[0] + ((a1 + b1) >> COL_SHIFT));
|
yading@10
|
257 dest += line_size;
|
yading@10
|
258 dest[0] = av_clip_pixel(dest[0] + ((a2 + b2) >> COL_SHIFT));
|
yading@10
|
259 dest += line_size;
|
yading@10
|
260 dest[0] = av_clip_pixel(dest[0] + ((a3 + b3) >> COL_SHIFT));
|
yading@10
|
261 dest += line_size;
|
yading@10
|
262 dest[0] = av_clip_pixel(dest[0] + ((a3 - b3) >> COL_SHIFT));
|
yading@10
|
263 dest += line_size;
|
yading@10
|
264 dest[0] = av_clip_pixel(dest[0] + ((a2 - b2) >> COL_SHIFT));
|
yading@10
|
265 dest += line_size;
|
yading@10
|
266 dest[0] = av_clip_pixel(dest[0] + ((a1 - b1) >> COL_SHIFT));
|
yading@10
|
267 dest += line_size;
|
yading@10
|
268 dest[0] = av_clip_pixel(dest[0] + ((a0 - b0) >> COL_SHIFT));
|
yading@10
|
269 }
|
yading@10
|
270
|
yading@10
|
271 static inline void FUNC(idctSparseCol)(int16_t *col)
|
yading@10
|
272 {
|
yading@10
|
273 int a0, a1, a2, a3, b0, b1, b2, b3;
|
yading@10
|
274
|
yading@10
|
275 IDCT_COLS;
|
yading@10
|
276
|
yading@10
|
277 col[0 ] = ((a0 + b0) >> COL_SHIFT);
|
yading@10
|
278 col[8 ] = ((a1 + b1) >> COL_SHIFT);
|
yading@10
|
279 col[16] = ((a2 + b2) >> COL_SHIFT);
|
yading@10
|
280 col[24] = ((a3 + b3) >> COL_SHIFT);
|
yading@10
|
281 col[32] = ((a3 - b3) >> COL_SHIFT);
|
yading@10
|
282 col[40] = ((a2 - b2) >> COL_SHIFT);
|
yading@10
|
283 col[48] = ((a1 - b1) >> COL_SHIFT);
|
yading@10
|
284 col[56] = ((a0 - b0) >> COL_SHIFT);
|
yading@10
|
285 }
|
yading@10
|
286
|
yading@10
|
287 void FUNC(ff_simple_idct_put)(uint8_t *dest_, int line_size, int16_t *block)
|
yading@10
|
288 {
|
yading@10
|
289 pixel *dest = (pixel *)dest_;
|
yading@10
|
290 int i;
|
yading@10
|
291
|
yading@10
|
292 line_size /= sizeof(pixel);
|
yading@10
|
293
|
yading@10
|
294 for (i = 0; i < 8; i++)
|
yading@10
|
295 FUNC(idctRowCondDC)(block + i*8, 0);
|
yading@10
|
296
|
yading@10
|
297 for (i = 0; i < 8; i++)
|
yading@10
|
298 FUNC(idctSparseColPut)(dest + i, line_size, block + i);
|
yading@10
|
299 }
|
yading@10
|
300
|
yading@10
|
301 void FUNC(ff_simple_idct_add)(uint8_t *dest_, int line_size, int16_t *block)
|
yading@10
|
302 {
|
yading@10
|
303 pixel *dest = (pixel *)dest_;
|
yading@10
|
304 int i;
|
yading@10
|
305
|
yading@10
|
306 line_size /= sizeof(pixel);
|
yading@10
|
307
|
yading@10
|
308 for (i = 0; i < 8; i++)
|
yading@10
|
309 FUNC(idctRowCondDC)(block + i*8, 0);
|
yading@10
|
310
|
yading@10
|
311 for (i = 0; i < 8; i++)
|
yading@10
|
312 FUNC(idctSparseColAdd)(dest + i, line_size, block + i);
|
yading@10
|
313 }
|
yading@10
|
314
|
yading@10
|
315 void FUNC(ff_simple_idct)(int16_t *block)
|
yading@10
|
316 {
|
yading@10
|
317 int i;
|
yading@10
|
318
|
yading@10
|
319 for (i = 0; i < 8; i++)
|
yading@10
|
320 FUNC(idctRowCondDC)(block + i*8, 0);
|
yading@10
|
321
|
yading@10
|
322 for (i = 0; i < 8; i++)
|
yading@10
|
323 FUNC(idctSparseCol)(block + i);
|
yading@10
|
324 }
|