h264idct_template.c
Go to the documentation of this file.
1 /*
2  * H.264 IDCT
3  * Copyright (c) 2004-2011 Michael Niedermayer <michaelni@gmx.at>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * H.264 IDCT.
25  * @author Michael Niedermayer <michaelni@gmx.at>
26  */
27 
28 #include "bit_depth_template.c"
29 #include "libavutil/common.h"
30 #include "h264.h"
31 
32 void FUNCC(ff_h264_idct_add)(uint8_t *_dst, int16_t *_block, int stride)
33 {
34  int i;
35  pixel *dst = (pixel*)_dst;
36  dctcoef *block = (dctcoef*)_block;
37  stride >>= sizeof(pixel)-1;
38 
39  block[0] += 1 << 5;
40 
41  for(i=0; i<4; i++){
42  const int z0= block[i + 4*0] + block[i + 4*2];
43  const int z1= block[i + 4*0] - block[i + 4*2];
44  const int z2= (block[i + 4*1]>>1) - block[i + 4*3];
45  const int z3= block[i + 4*1] + (block[i + 4*3]>>1);
46 
47  block[i + 4*0]= z0 + z3;
48  block[i + 4*1]= z1 + z2;
49  block[i + 4*2]= z1 - z2;
50  block[i + 4*3]= z0 - z3;
51  }
52 
53  for(i=0; i<4; i++){
54  const int z0= block[0 + 4*i] + block[2 + 4*i];
55  const int z1= block[0 + 4*i] - block[2 + 4*i];
56  const int z2= (block[1 + 4*i]>>1) - block[3 + 4*i];
57  const int z3= block[1 + 4*i] + (block[3 + 4*i]>>1);
58 
59  dst[i + 0*stride]= av_clip_pixel(dst[i + 0*stride] + ((z0 + z3) >> 6));
60  dst[i + 1*stride]= av_clip_pixel(dst[i + 1*stride] + ((z1 + z2) >> 6));
61  dst[i + 2*stride]= av_clip_pixel(dst[i + 2*stride] + ((z1 - z2) >> 6));
62  dst[i + 3*stride]= av_clip_pixel(dst[i + 3*stride] + ((z0 - z3) >> 6));
63  }
64 
65  memset(block, 0, 16 * sizeof(dctcoef));
66 }
67 
68 void FUNCC(ff_h264_idct8_add)(uint8_t *_dst, int16_t *_block, int stride){
69  int i;
70  pixel *dst = (pixel*)_dst;
71  dctcoef *block = (dctcoef*)_block;
72  stride >>= sizeof(pixel)-1;
73 
74  block[0] += 32;
75 
76  for( i = 0; i < 8; i++ )
77  {
78  const int a0 = block[i+0*8] + block[i+4*8];
79  const int a2 = block[i+0*8] - block[i+4*8];
80  const int a4 = (block[i+2*8]>>1) - block[i+6*8];
81  const int a6 = (block[i+6*8]>>1) + block[i+2*8];
82 
83  const int b0 = a0 + a6;
84  const int b2 = a2 + a4;
85  const int b4 = a2 - a4;
86  const int b6 = a0 - a6;
87 
88  const int a1 = -block[i+3*8] + block[i+5*8] - block[i+7*8] - (block[i+7*8]>>1);
89  const int a3 = block[i+1*8] + block[i+7*8] - block[i+3*8] - (block[i+3*8]>>1);
90  const int a5 = -block[i+1*8] + block[i+7*8] + block[i+5*8] + (block[i+5*8]>>1);
91  const int a7 = block[i+3*8] + block[i+5*8] + block[i+1*8] + (block[i+1*8]>>1);
92 
93  const int b1 = (a7>>2) + a1;
94  const int b3 = a3 + (a5>>2);
95  const int b5 = (a3>>2) - a5;
96  const int b7 = a7 - (a1>>2);
97 
98  block[i+0*8] = b0 + b7;
99  block[i+7*8] = b0 - b7;
100  block[i+1*8] = b2 + b5;
101  block[i+6*8] = b2 - b5;
102  block[i+2*8] = b4 + b3;
103  block[i+5*8] = b4 - b3;
104  block[i+3*8] = b6 + b1;
105  block[i+4*8] = b6 - b1;
106  }
107  for( i = 0; i < 8; i++ )
108  {
109  const int a0 = block[0+i*8] + block[4+i*8];
110  const int a2 = block[0+i*8] - block[4+i*8];
111  const int a4 = (block[2+i*8]>>1) - block[6+i*8];
112  const int a6 = (block[6+i*8]>>1) + block[2+i*8];
113 
114  const int b0 = a0 + a6;
115  const int b2 = a2 + a4;
116  const int b4 = a2 - a4;
117  const int b6 = a0 - a6;
118 
119  const int a1 = -block[3+i*8] + block[5+i*8] - block[7+i*8] - (block[7+i*8]>>1);
120  const int a3 = block[1+i*8] + block[7+i*8] - block[3+i*8] - (block[3+i*8]>>1);
121  const int a5 = -block[1+i*8] + block[7+i*8] + block[5+i*8] + (block[5+i*8]>>1);
122  const int a7 = block[3+i*8] + block[5+i*8] + block[1+i*8] + (block[1+i*8]>>1);
123 
124  const int b1 = (a7>>2) + a1;
125  const int b3 = a3 + (a5>>2);
126  const int b5 = (a3>>2) - a5;
127  const int b7 = a7 - (a1>>2);
128 
129  dst[i + 0*stride] = av_clip_pixel( dst[i + 0*stride] + ((b0 + b7) >> 6) );
130  dst[i + 1*stride] = av_clip_pixel( dst[i + 1*stride] + ((b2 + b5) >> 6) );
131  dst[i + 2*stride] = av_clip_pixel( dst[i + 2*stride] + ((b4 + b3) >> 6) );
132  dst[i + 3*stride] = av_clip_pixel( dst[i + 3*stride] + ((b6 + b1) >> 6) );
133  dst[i + 4*stride] = av_clip_pixel( dst[i + 4*stride] + ((b6 - b1) >> 6) );
134  dst[i + 5*stride] = av_clip_pixel( dst[i + 5*stride] + ((b4 - b3) >> 6) );
135  dst[i + 6*stride] = av_clip_pixel( dst[i + 6*stride] + ((b2 - b5) >> 6) );
136  dst[i + 7*stride] = av_clip_pixel( dst[i + 7*stride] + ((b0 - b7) >> 6) );
137  }
138 
139  memset(block, 0, 64 * sizeof(dctcoef));
140 }
141 
142 // assumes all AC coefs are 0
143 void FUNCC(ff_h264_idct_dc_add)(uint8_t *_dst, int16_t *_block, int stride){
144  int i, j;
145  pixel *dst = (pixel*)_dst;
146  dctcoef *block = (dctcoef*)_block;
147  int dc = (block[0] + 32) >> 6;
148  stride /= sizeof(pixel);
149  block[0] = 0;
150  for( j = 0; j < 4; j++ )
151  {
152  for( i = 0; i < 4; i++ )
153  dst[i] = av_clip_pixel( dst[i] + dc );
154  dst += stride;
155  }
156 }
157 
158 void FUNCC(ff_h264_idct8_dc_add)(uint8_t *_dst, int16_t *_block, int stride){
159  int i, j;
160  pixel *dst = (pixel*)_dst;
161  dctcoef *block = (dctcoef*)_block;
162  int dc = (block[0] + 32) >> 6;
163  block[0] = 0;
164  stride /= sizeof(pixel);
165  for( j = 0; j < 8; j++ )
166  {
167  for( i = 0; i < 8; i++ )
168  dst[i] = av_clip_pixel( dst[i] + dc );
169  dst += stride;
170  }
171 }
172 
173 void FUNCC(ff_h264_idct_add16)(uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15*8]){
174  int i;
175  for(i=0; i<16; i++){
176  int nnz = nnzc[ scan8[i] ];
177  if(nnz){
178  if(nnz==1 && ((dctcoef*)block)[i*16]) FUNCC(ff_h264_idct_dc_add)(dst + block_offset[i], block + i*16*sizeof(pixel), stride);
179  else FUNCC(ff_h264_idct_add )(dst + block_offset[i], block + i*16*sizeof(pixel), stride);
180  }
181  }
182 }
183 
184 void FUNCC(ff_h264_idct_add16intra)(uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15*8]){
185  int i;
186  for(i=0; i<16; i++){
187  if(nnzc[ scan8[i] ]) FUNCC(ff_h264_idct_add )(dst + block_offset[i], block + i*16*sizeof(pixel), stride);
188  else if(((dctcoef*)block)[i*16]) FUNCC(ff_h264_idct_dc_add)(dst + block_offset[i], block + i*16*sizeof(pixel), stride);
189  }
190 }
191 
192 void FUNCC(ff_h264_idct8_add4)(uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15*8]){
193  int i;
194  for(i=0; i<16; i+=4){
195  int nnz = nnzc[ scan8[i] ];
196  if(nnz){
197  if(nnz==1 && ((dctcoef*)block)[i*16]) FUNCC(ff_h264_idct8_dc_add)(dst + block_offset[i], block + i*16*sizeof(pixel), stride);
198  else FUNCC(ff_h264_idct8_add )(dst + block_offset[i], block + i*16*sizeof(pixel), stride);
199  }
200  }
201 }
202 
203 void FUNCC(ff_h264_idct_add8)(uint8_t **dest, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15*8]){
204  int i, j;
205  for(j=1; j<3; j++){
206  for(i=j*16; i<j*16+4; i++){
207  if(nnzc[ scan8[i] ])
208  FUNCC(ff_h264_idct_add )(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride);
209  else if(((dctcoef*)block)[i*16])
210  FUNCC(ff_h264_idct_dc_add)(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride);
211  }
212  }
213 }
214 
215 void FUNCC(ff_h264_idct_add8_422)(uint8_t **dest, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15*8]){
216  int i, j;
217 
218  for(j=1; j<3; j++){
219  for(i=j*16; i<j*16+4; i++){
220  if(nnzc[ scan8[i] ])
221  FUNCC(ff_h264_idct_add )(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride);
222  else if(((dctcoef*)block)[i*16])
223  FUNCC(ff_h264_idct_dc_add)(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride);
224  }
225  }
226 
227  for(j=1; j<3; j++){
228  for(i=j*16+4; i<j*16+8; i++){
229  if(nnzc[ scan8[i+4] ])
230  FUNCC(ff_h264_idct_add )(dest[j-1] + block_offset[i+4], block + i*16*sizeof(pixel), stride);
231  else if(((dctcoef*)block)[i*16])
232  FUNCC(ff_h264_idct_dc_add)(dest[j-1] + block_offset[i+4], block + i*16*sizeof(pixel), stride);
233  }
234  }
235 }
236 
237 /**
238  * IDCT transforms the 16 dc values and dequantizes them.
239  * @param qmul quantization parameter
240  */
241 void FUNCC(ff_h264_luma_dc_dequant_idct)(int16_t *_output, int16_t *_input, int qmul){
242 #define stride 16
243  int i;
244  int temp[16];
245  static const uint8_t x_offset[4]={0, 2*stride, 8*stride, 10*stride};
246  dctcoef *input = (dctcoef*)_input;
247  dctcoef *output = (dctcoef*)_output;
248 
249  for(i=0; i<4; i++){
250  const int z0= input[4*i+0] + input[4*i+1];
251  const int z1= input[4*i+0] - input[4*i+1];
252  const int z2= input[4*i+2] - input[4*i+3];
253  const int z3= input[4*i+2] + input[4*i+3];
254 
255  temp[4*i+0]= z0+z3;
256  temp[4*i+1]= z0-z3;
257  temp[4*i+2]= z1-z2;
258  temp[4*i+3]= z1+z2;
259  }
260 
261  for(i=0; i<4; i++){
262  const int offset= x_offset[i];
263  const int z0= temp[4*0+i] + temp[4*2+i];
264  const int z1= temp[4*0+i] - temp[4*2+i];
265  const int z2= temp[4*1+i] - temp[4*3+i];
266  const int z3= temp[4*1+i] + temp[4*3+i];
267 
268  output[stride* 0+offset]= ((((z0 + z3)*qmul + 128 ) >> 8));
269  output[stride* 1+offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
270  output[stride* 4+offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
271  output[stride* 5+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
272  }
273 #undef stride
274 }
275 
276 void FUNCC(ff_h264_chroma422_dc_dequant_idct)(int16_t *_block, int qmul){
277  const int stride= 16*2;
278  const int xStride= 16;
279  int i;
280  int temp[8];
281  static const uint8_t x_offset[2]={0, 16};
282  dctcoef *block = (dctcoef*)_block;
283 
284  for(i=0; i<4; i++){
285  temp[2*i+0] = block[stride*i + xStride*0] + block[stride*i + xStride*1];
286  temp[2*i+1] = block[stride*i + xStride*0] - block[stride*i + xStride*1];
287  }
288 
289  for(i=0; i<2; i++){
290  const int offset= x_offset[i];
291  const int z0= temp[2*0+i] + temp[2*2+i];
292  const int z1= temp[2*0+i] - temp[2*2+i];
293  const int z2= temp[2*1+i] - temp[2*3+i];
294  const int z3= temp[2*1+i] + temp[2*3+i];
295 
296  block[stride*0+offset]= ((z0 + z3)*qmul + 128) >> 8;
297  block[stride*1+offset]= ((z1 + z2)*qmul + 128) >> 8;
298  block[stride*2+offset]= ((z1 - z2)*qmul + 128) >> 8;
299  block[stride*3+offset]= ((z0 - z3)*qmul + 128) >> 8;
300  }
301 }
302 
303 void FUNCC(ff_h264_chroma_dc_dequant_idct)(int16_t *_block, int qmul){
304  const int stride= 16*2;
305  const int xStride= 16;
306  int a,b,c,d,e;
307  dctcoef *block = (dctcoef*)_block;
308 
309  a= block[stride*0 + xStride*0];
310  b= block[stride*0 + xStride*1];
311  c= block[stride*1 + xStride*0];
312  d= block[stride*1 + xStride*1];
313 
314  e= a-b;
315  a= a+b;
316  b= c-d;
317  c= c+d;
318 
319  block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
320  block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
321  block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
322  block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
323 }
else temp
Definition: vf_mcdeint.c:148
#define a0
Definition: regdef.h:46
#define a1
Definition: regdef.h:47
set threshold d
#define av_clip_pixel(a)
void FUNCC() ff_h264_idct8_dc_add(uint8_t *_dst, int16_t *_block, int stride)
#define a3
Definition: regdef.h:49
uint8_t
void FUNCC() ff_h264_idct8_add(uint8_t *_dst, int16_t *_block, int stride)
#define b
Definition: input.c:42
H.264 / AVC / MPEG4 part10 codec.
void FUNCC() ff_h264_idct_add16(uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame This method is called when a frame is wanted on an output For an input
static const uint8_t offset[127][2]
Definition: vf_spp.c:70
static const uint8_t scan8[16 *3+3]
Definition: h264.h:812
void FUNCC() ff_h264_idct_add(uint8_t *_dst, int16_t *_block, int stride)
#define a2
Definition: regdef.h:48
void FUNCC() ff_h264_chroma_dc_dequant_idct(int16_t *_block, int qmul)
dest
Definition: start.py:60
#define a5
Definition: regdef.h:51
void FUNCC() ff_h264_idct8_add4(uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
FIXME Range Coding of cr are mx and my are Motion Vector top and top right vectors is used as motion vector prediction the used motion vector is the sum of the predictor and(mvx_diff, mvy_diff)*mv_scale Intra DC Predicton block[y][x] dc[1]
Definition: snow.txt:392
#define dctcoef
synthesis window for stochastic i
void FUNCC() ff_h264_luma_dc_dequant_idct(int16_t *_output, int16_t *_input, int qmul)
IDCT transforms the 16 dc values and dequantizes them.
uint8_t pixel
Definition: tiny_ssim.c:40
void FUNCC() ff_h264_idct_dc_add(uint8_t *_dst, int16_t *_block, int stride)
common internal and external API header
void FUNCC() ff_h264_chroma422_dc_dequant_idct(int16_t *_block, int qmul)
static double c[64]
void FUNCC() ff_h264_idct_add8_422(uint8_t **dest, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
these buffered frames must be flushed immediately if a new input produces new output(Example:frame rate-doubling filter:filter_frame must(1) flush the second copy of the previous frame, if it is still there,(2) push the first copy of the incoming frame,(3) keep the second copy for later.) If the input frame is not enough to produce output
#define a4
Definition: regdef.h:50
void FUNCC() ff_h264_idct_add8(uint8_t **dest, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
else dst[i][x+y *dst_stride[i]]
Definition: vf_mcdeint.c:160
#define stride
#define FUNCC(a)
void FUNCC() ff_h264_idct_add16intra(uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15 *8])