yading@10
|
1 /*
|
yading@10
|
2 * H.264 IDCT
|
yading@10
|
3 * Copyright (c) 2004-2011 Michael Niedermayer <michaelni@gmx.at>
|
yading@10
|
4 *
|
yading@10
|
5 * This file is part of FFmpeg.
|
yading@10
|
6 *
|
yading@10
|
7 * FFmpeg is free software; you can redistribute it and/or
|
yading@10
|
8 * modify it under the terms of the GNU Lesser General Public
|
yading@10
|
9 * License as published by the Free Software Foundation; either
|
yading@10
|
10 * version 2.1 of the License, or (at your option) any later version.
|
yading@10
|
11 *
|
yading@10
|
12 * FFmpeg is distributed in the hope that it will be useful,
|
yading@10
|
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
yading@10
|
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
yading@10
|
15 * Lesser General Public License for more details.
|
yading@10
|
16 *
|
yading@10
|
17 * You should have received a copy of the GNU Lesser General Public
|
yading@10
|
18 * License along with FFmpeg; if not, write to the Free Software
|
yading@10
|
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
yading@10
|
20 */
|
yading@10
|
21
|
yading@10
|
22 /**
|
yading@10
|
23 * @file
|
yading@10
|
24 * H.264 IDCT.
|
yading@10
|
25 * @author Michael Niedermayer <michaelni@gmx.at>
|
yading@10
|
26 */
|
yading@10
|
27
|
yading@10
|
28 #include "bit_depth_template.c"
|
yading@10
|
29 #include "libavutil/common.h"
|
yading@10
|
30 #include "h264.h"
|
yading@10
|
31
|
yading@10
|
32 void FUNCC(ff_h264_idct_add)(uint8_t *_dst, int16_t *_block, int stride)
|
yading@10
|
33 {
|
yading@10
|
34 int i;
|
yading@10
|
35 pixel *dst = (pixel*)_dst;
|
yading@10
|
36 dctcoef *block = (dctcoef*)_block;
|
yading@10
|
37 stride >>= sizeof(pixel)-1;
|
yading@10
|
38
|
yading@10
|
39 block[0] += 1 << 5;
|
yading@10
|
40
|
yading@10
|
41 for(i=0; i<4; i++){
|
yading@10
|
42 const int z0= block[i + 4*0] + block[i + 4*2];
|
yading@10
|
43 const int z1= block[i + 4*0] - block[i + 4*2];
|
yading@10
|
44 const int z2= (block[i + 4*1]>>1) - block[i + 4*3];
|
yading@10
|
45 const int z3= block[i + 4*1] + (block[i + 4*3]>>1);
|
yading@10
|
46
|
yading@10
|
47 block[i + 4*0]= z0 + z3;
|
yading@10
|
48 block[i + 4*1]= z1 + z2;
|
yading@10
|
49 block[i + 4*2]= z1 - z2;
|
yading@10
|
50 block[i + 4*3]= z0 - z3;
|
yading@10
|
51 }
|
yading@10
|
52
|
yading@10
|
53 for(i=0; i<4; i++){
|
yading@10
|
54 const int z0= block[0 + 4*i] + block[2 + 4*i];
|
yading@10
|
55 const int z1= block[0 + 4*i] - block[2 + 4*i];
|
yading@10
|
56 const int z2= (block[1 + 4*i]>>1) - block[3 + 4*i];
|
yading@10
|
57 const int z3= block[1 + 4*i] + (block[3 + 4*i]>>1);
|
yading@10
|
58
|
yading@10
|
59 dst[i + 0*stride]= av_clip_pixel(dst[i + 0*stride] + ((z0 + z3) >> 6));
|
yading@10
|
60 dst[i + 1*stride]= av_clip_pixel(dst[i + 1*stride] + ((z1 + z2) >> 6));
|
yading@10
|
61 dst[i + 2*stride]= av_clip_pixel(dst[i + 2*stride] + ((z1 - z2) >> 6));
|
yading@10
|
62 dst[i + 3*stride]= av_clip_pixel(dst[i + 3*stride] + ((z0 - z3) >> 6));
|
yading@10
|
63 }
|
yading@10
|
64
|
yading@10
|
65 memset(block, 0, 16 * sizeof(dctcoef));
|
yading@10
|
66 }
|
yading@10
|
67
|
yading@10
|
68 void FUNCC(ff_h264_idct8_add)(uint8_t *_dst, int16_t *_block, int stride){
|
yading@10
|
69 int i;
|
yading@10
|
70 pixel *dst = (pixel*)_dst;
|
yading@10
|
71 dctcoef *block = (dctcoef*)_block;
|
yading@10
|
72 stride >>= sizeof(pixel)-1;
|
yading@10
|
73
|
yading@10
|
74 block[0] += 32;
|
yading@10
|
75
|
yading@10
|
76 for( i = 0; i < 8; i++ )
|
yading@10
|
77 {
|
yading@10
|
78 const int a0 = block[i+0*8] + block[i+4*8];
|
yading@10
|
79 const int a2 = block[i+0*8] - block[i+4*8];
|
yading@10
|
80 const int a4 = (block[i+2*8]>>1) - block[i+6*8];
|
yading@10
|
81 const int a6 = (block[i+6*8]>>1) + block[i+2*8];
|
yading@10
|
82
|
yading@10
|
83 const int b0 = a0 + a6;
|
yading@10
|
84 const int b2 = a2 + a4;
|
yading@10
|
85 const int b4 = a2 - a4;
|
yading@10
|
86 const int b6 = a0 - a6;
|
yading@10
|
87
|
yading@10
|
88 const int a1 = -block[i+3*8] + block[i+5*8] - block[i+7*8] - (block[i+7*8]>>1);
|
yading@10
|
89 const int a3 = block[i+1*8] + block[i+7*8] - block[i+3*8] - (block[i+3*8]>>1);
|
yading@10
|
90 const int a5 = -block[i+1*8] + block[i+7*8] + block[i+5*8] + (block[i+5*8]>>1);
|
yading@10
|
91 const int a7 = block[i+3*8] + block[i+5*8] + block[i+1*8] + (block[i+1*8]>>1);
|
yading@10
|
92
|
yading@10
|
93 const int b1 = (a7>>2) + a1;
|
yading@10
|
94 const int b3 = a3 + (a5>>2);
|
yading@10
|
95 const int b5 = (a3>>2) - a5;
|
yading@10
|
96 const int b7 = a7 - (a1>>2);
|
yading@10
|
97
|
yading@10
|
98 block[i+0*8] = b0 + b7;
|
yading@10
|
99 block[i+7*8] = b0 - b7;
|
yading@10
|
100 block[i+1*8] = b2 + b5;
|
yading@10
|
101 block[i+6*8] = b2 - b5;
|
yading@10
|
102 block[i+2*8] = b4 + b3;
|
yading@10
|
103 block[i+5*8] = b4 - b3;
|
yading@10
|
104 block[i+3*8] = b6 + b1;
|
yading@10
|
105 block[i+4*8] = b6 - b1;
|
yading@10
|
106 }
|
yading@10
|
107 for( i = 0; i < 8; i++ )
|
yading@10
|
108 {
|
yading@10
|
109 const int a0 = block[0+i*8] + block[4+i*8];
|
yading@10
|
110 const int a2 = block[0+i*8] - block[4+i*8];
|
yading@10
|
111 const int a4 = (block[2+i*8]>>1) - block[6+i*8];
|
yading@10
|
112 const int a6 = (block[6+i*8]>>1) + block[2+i*8];
|
yading@10
|
113
|
yading@10
|
114 const int b0 = a0 + a6;
|
yading@10
|
115 const int b2 = a2 + a4;
|
yading@10
|
116 const int b4 = a2 - a4;
|
yading@10
|
117 const int b6 = a0 - a6;
|
yading@10
|
118
|
yading@10
|
119 const int a1 = -block[3+i*8] + block[5+i*8] - block[7+i*8] - (block[7+i*8]>>1);
|
yading@10
|
120 const int a3 = block[1+i*8] + block[7+i*8] - block[3+i*8] - (block[3+i*8]>>1);
|
yading@10
|
121 const int a5 = -block[1+i*8] + block[7+i*8] + block[5+i*8] + (block[5+i*8]>>1);
|
yading@10
|
122 const int a7 = block[3+i*8] + block[5+i*8] + block[1+i*8] + (block[1+i*8]>>1);
|
yading@10
|
123
|
yading@10
|
124 const int b1 = (a7>>2) + a1;
|
yading@10
|
125 const int b3 = a3 + (a5>>2);
|
yading@10
|
126 const int b5 = (a3>>2) - a5;
|
yading@10
|
127 const int b7 = a7 - (a1>>2);
|
yading@10
|
128
|
yading@10
|
129 dst[i + 0*stride] = av_clip_pixel( dst[i + 0*stride] + ((b0 + b7) >> 6) );
|
yading@10
|
130 dst[i + 1*stride] = av_clip_pixel( dst[i + 1*stride] + ((b2 + b5) >> 6) );
|
yading@10
|
131 dst[i + 2*stride] = av_clip_pixel( dst[i + 2*stride] + ((b4 + b3) >> 6) );
|
yading@10
|
132 dst[i + 3*stride] = av_clip_pixel( dst[i + 3*stride] + ((b6 + b1) >> 6) );
|
yading@10
|
133 dst[i + 4*stride] = av_clip_pixel( dst[i + 4*stride] + ((b6 - b1) >> 6) );
|
yading@10
|
134 dst[i + 5*stride] = av_clip_pixel( dst[i + 5*stride] + ((b4 - b3) >> 6) );
|
yading@10
|
135 dst[i + 6*stride] = av_clip_pixel( dst[i + 6*stride] + ((b2 - b5) >> 6) );
|
yading@10
|
136 dst[i + 7*stride] = av_clip_pixel( dst[i + 7*stride] + ((b0 - b7) >> 6) );
|
yading@10
|
137 }
|
yading@10
|
138
|
yading@10
|
139 memset(block, 0, 64 * sizeof(dctcoef));
|
yading@10
|
140 }
|
yading@10
|
141
|
yading@10
|
142 // assumes all AC coefs are 0
|
yading@10
|
143 void FUNCC(ff_h264_idct_dc_add)(uint8_t *_dst, int16_t *_block, int stride){
|
yading@10
|
144 int i, j;
|
yading@10
|
145 pixel *dst = (pixel*)_dst;
|
yading@10
|
146 dctcoef *block = (dctcoef*)_block;
|
yading@10
|
147 int dc = (block[0] + 32) >> 6;
|
yading@10
|
148 stride /= sizeof(pixel);
|
yading@10
|
149 block[0] = 0;
|
yading@10
|
150 for( j = 0; j < 4; j++ )
|
yading@10
|
151 {
|
yading@10
|
152 for( i = 0; i < 4; i++ )
|
yading@10
|
153 dst[i] = av_clip_pixel( dst[i] + dc );
|
yading@10
|
154 dst += stride;
|
yading@10
|
155 }
|
yading@10
|
156 }
|
yading@10
|
157
|
yading@10
|
158 void FUNCC(ff_h264_idct8_dc_add)(uint8_t *_dst, int16_t *_block, int stride){
|
yading@10
|
159 int i, j;
|
yading@10
|
160 pixel *dst = (pixel*)_dst;
|
yading@10
|
161 dctcoef *block = (dctcoef*)_block;
|
yading@10
|
162 int dc = (block[0] + 32) >> 6;
|
yading@10
|
163 block[0] = 0;
|
yading@10
|
164 stride /= sizeof(pixel);
|
yading@10
|
165 for( j = 0; j < 8; j++ )
|
yading@10
|
166 {
|
yading@10
|
167 for( i = 0; i < 8; i++ )
|
yading@10
|
168 dst[i] = av_clip_pixel( dst[i] + dc );
|
yading@10
|
169 dst += stride;
|
yading@10
|
170 }
|
yading@10
|
171 }
|
yading@10
|
172
|
yading@10
|
173 void FUNCC(ff_h264_idct_add16)(uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15*8]){
|
yading@10
|
174 int i;
|
yading@10
|
175 for(i=0; i<16; i++){
|
yading@10
|
176 int nnz = nnzc[ scan8[i] ];
|
yading@10
|
177 if(nnz){
|
yading@10
|
178 if(nnz==1 && ((dctcoef*)block)[i*16]) FUNCC(ff_h264_idct_dc_add)(dst + block_offset[i], block + i*16*sizeof(pixel), stride);
|
yading@10
|
179 else FUNCC(ff_h264_idct_add )(dst + block_offset[i], block + i*16*sizeof(pixel), stride);
|
yading@10
|
180 }
|
yading@10
|
181 }
|
yading@10
|
182 }
|
yading@10
|
183
|
yading@10
|
184 void FUNCC(ff_h264_idct_add16intra)(uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15*8]){
|
yading@10
|
185 int i;
|
yading@10
|
186 for(i=0; i<16; i++){
|
yading@10
|
187 if(nnzc[ scan8[i] ]) FUNCC(ff_h264_idct_add )(dst + block_offset[i], block + i*16*sizeof(pixel), stride);
|
yading@10
|
188 else if(((dctcoef*)block)[i*16]) FUNCC(ff_h264_idct_dc_add)(dst + block_offset[i], block + i*16*sizeof(pixel), stride);
|
yading@10
|
189 }
|
yading@10
|
190 }
|
yading@10
|
191
|
yading@10
|
192 void FUNCC(ff_h264_idct8_add4)(uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15*8]){
|
yading@10
|
193 int i;
|
yading@10
|
194 for(i=0; i<16; i+=4){
|
yading@10
|
195 int nnz = nnzc[ scan8[i] ];
|
yading@10
|
196 if(nnz){
|
yading@10
|
197 if(nnz==1 && ((dctcoef*)block)[i*16]) FUNCC(ff_h264_idct8_dc_add)(dst + block_offset[i], block + i*16*sizeof(pixel), stride);
|
yading@10
|
198 else FUNCC(ff_h264_idct8_add )(dst + block_offset[i], block + i*16*sizeof(pixel), stride);
|
yading@10
|
199 }
|
yading@10
|
200 }
|
yading@10
|
201 }
|
yading@10
|
202
|
yading@10
|
203 void FUNCC(ff_h264_idct_add8)(uint8_t **dest, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15*8]){
|
yading@10
|
204 int i, j;
|
yading@10
|
205 for(j=1; j<3; j++){
|
yading@10
|
206 for(i=j*16; i<j*16+4; i++){
|
yading@10
|
207 if(nnzc[ scan8[i] ])
|
yading@10
|
208 FUNCC(ff_h264_idct_add )(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride);
|
yading@10
|
209 else if(((dctcoef*)block)[i*16])
|
yading@10
|
210 FUNCC(ff_h264_idct_dc_add)(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride);
|
yading@10
|
211 }
|
yading@10
|
212 }
|
yading@10
|
213 }
|
yading@10
|
214
|
yading@10
|
215 void FUNCC(ff_h264_idct_add8_422)(uint8_t **dest, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15*8]){
|
yading@10
|
216 int i, j;
|
yading@10
|
217
|
yading@10
|
218 for(j=1; j<3; j++){
|
yading@10
|
219 for(i=j*16; i<j*16+4; i++){
|
yading@10
|
220 if(nnzc[ scan8[i] ])
|
yading@10
|
221 FUNCC(ff_h264_idct_add )(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride);
|
yading@10
|
222 else if(((dctcoef*)block)[i*16])
|
yading@10
|
223 FUNCC(ff_h264_idct_dc_add)(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride);
|
yading@10
|
224 }
|
yading@10
|
225 }
|
yading@10
|
226
|
yading@10
|
227 for(j=1; j<3; j++){
|
yading@10
|
228 for(i=j*16+4; i<j*16+8; i++){
|
yading@10
|
229 if(nnzc[ scan8[i+4] ])
|
yading@10
|
230 FUNCC(ff_h264_idct_add )(dest[j-1] + block_offset[i+4], block + i*16*sizeof(pixel), stride);
|
yading@10
|
231 else if(((dctcoef*)block)[i*16])
|
yading@10
|
232 FUNCC(ff_h264_idct_dc_add)(dest[j-1] + block_offset[i+4], block + i*16*sizeof(pixel), stride);
|
yading@10
|
233 }
|
yading@10
|
234 }
|
yading@10
|
235 }
|
yading@10
|
236
|
yading@10
|
237 /**
|
yading@10
|
238 * IDCT transforms the 16 dc values and dequantizes them.
|
yading@10
|
239 * @param qmul quantization parameter
|
yading@10
|
240 */
|
yading@10
|
241 void FUNCC(ff_h264_luma_dc_dequant_idct)(int16_t *_output, int16_t *_input, int qmul){
|
yading@10
|
242 #define stride 16
|
yading@10
|
243 int i;
|
yading@10
|
244 int temp[16];
|
yading@10
|
245 static const uint8_t x_offset[4]={0, 2*stride, 8*stride, 10*stride};
|
yading@10
|
246 dctcoef *input = (dctcoef*)_input;
|
yading@10
|
247 dctcoef *output = (dctcoef*)_output;
|
yading@10
|
248
|
yading@10
|
249 for(i=0; i<4; i++){
|
yading@10
|
250 const int z0= input[4*i+0] + input[4*i+1];
|
yading@10
|
251 const int z1= input[4*i+0] - input[4*i+1];
|
yading@10
|
252 const int z2= input[4*i+2] - input[4*i+3];
|
yading@10
|
253 const int z3= input[4*i+2] + input[4*i+3];
|
yading@10
|
254
|
yading@10
|
255 temp[4*i+0]= z0+z3;
|
yading@10
|
256 temp[4*i+1]= z0-z3;
|
yading@10
|
257 temp[4*i+2]= z1-z2;
|
yading@10
|
258 temp[4*i+3]= z1+z2;
|
yading@10
|
259 }
|
yading@10
|
260
|
yading@10
|
261 for(i=0; i<4; i++){
|
yading@10
|
262 const int offset= x_offset[i];
|
yading@10
|
263 const int z0= temp[4*0+i] + temp[4*2+i];
|
yading@10
|
264 const int z1= temp[4*0+i] - temp[4*2+i];
|
yading@10
|
265 const int z2= temp[4*1+i] - temp[4*3+i];
|
yading@10
|
266 const int z3= temp[4*1+i] + temp[4*3+i];
|
yading@10
|
267
|
yading@10
|
268 output[stride* 0+offset]= ((((z0 + z3)*qmul + 128 ) >> 8));
|
yading@10
|
269 output[stride* 1+offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
|
yading@10
|
270 output[stride* 4+offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
|
yading@10
|
271 output[stride* 5+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
|
yading@10
|
272 }
|
yading@10
|
273 #undef stride
|
yading@10
|
274 }
|
yading@10
|
275
|
yading@10
|
276 void FUNCC(ff_h264_chroma422_dc_dequant_idct)(int16_t *_block, int qmul){
|
yading@10
|
277 const int stride= 16*2;
|
yading@10
|
278 const int xStride= 16;
|
yading@10
|
279 int i;
|
yading@10
|
280 int temp[8];
|
yading@10
|
281 static const uint8_t x_offset[2]={0, 16};
|
yading@10
|
282 dctcoef *block = (dctcoef*)_block;
|
yading@10
|
283
|
yading@10
|
284 for(i=0; i<4; i++){
|
yading@10
|
285 temp[2*i+0] = block[stride*i + xStride*0] + block[stride*i + xStride*1];
|
yading@10
|
286 temp[2*i+1] = block[stride*i + xStride*0] - block[stride*i + xStride*1];
|
yading@10
|
287 }
|
yading@10
|
288
|
yading@10
|
289 for(i=0; i<2; i++){
|
yading@10
|
290 const int offset= x_offset[i];
|
yading@10
|
291 const int z0= temp[2*0+i] + temp[2*2+i];
|
yading@10
|
292 const int z1= temp[2*0+i] - temp[2*2+i];
|
yading@10
|
293 const int z2= temp[2*1+i] - temp[2*3+i];
|
yading@10
|
294 const int z3= temp[2*1+i] + temp[2*3+i];
|
yading@10
|
295
|
yading@10
|
296 block[stride*0+offset]= ((z0 + z3)*qmul + 128) >> 8;
|
yading@10
|
297 block[stride*1+offset]= ((z1 + z2)*qmul + 128) >> 8;
|
yading@10
|
298 block[stride*2+offset]= ((z1 - z2)*qmul + 128) >> 8;
|
yading@10
|
299 block[stride*3+offset]= ((z0 - z3)*qmul + 128) >> 8;
|
yading@10
|
300 }
|
yading@10
|
301 }
|
yading@10
|
302
|
yading@10
|
303 void FUNCC(ff_h264_chroma_dc_dequant_idct)(int16_t *_block, int qmul){
|
yading@10
|
304 const int stride= 16*2;
|
yading@10
|
305 const int xStride= 16;
|
yading@10
|
306 int a,b,c,d,e;
|
yading@10
|
307 dctcoef *block = (dctcoef*)_block;
|
yading@10
|
308
|
yading@10
|
309 a= block[stride*0 + xStride*0];
|
yading@10
|
310 b= block[stride*0 + xStride*1];
|
yading@10
|
311 c= block[stride*1 + xStride*0];
|
yading@10
|
312 d= block[stride*1 + xStride*1];
|
yading@10
|
313
|
yading@10
|
314 e= a-b;
|
yading@10
|
315 a= a+b;
|
yading@10
|
316 b= c-d;
|
yading@10
|
317 c= c+d;
|
yading@10
|
318
|
yading@10
|
319 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
|
yading@10
|
320 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
|
yading@10
|
321 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
|
yading@10
|
322 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
|
yading@10
|
323 }
|