jrevdct.c
Go to the documentation of this file.
1 /*
2  * This file is part of the Independent JPEG Group's software.
3  *
4  * The authors make NO WARRANTY or representation, either express or implied,
5  * with respect to this software, its quality, accuracy, merchantability, or
6  * fitness for a particular purpose. This software is provided "AS IS", and
7  * you, its user, assume the entire risk as to its quality and accuracy.
8  *
9  * This software is copyright (C) 1991, 1992, Thomas G. Lane.
10  * All Rights Reserved except as specified below.
11  *
12  * Permission is hereby granted to use, copy, modify, and distribute this
13  * software (or portions thereof) for any purpose, without fee, subject to
14  * these conditions:
15  * (1) If any part of the source code for this software is distributed, then
16  * this README file must be included, with this copyright and no-warranty
17  * notice unaltered; and any additions, deletions, or changes to the original
18  * files must be clearly indicated in accompanying documentation.
19  * (2) If only executable code is distributed, then the accompanying
20  * documentation must state that "this software is based in part on the work
21  * of the Independent JPEG Group".
22  * (3) Permission for use of this software is granted only if the user accepts
23  * full responsibility for any undesirable consequences; the authors accept
24  * NO LIABILITY for damages of any kind.
25  *
26  * These conditions apply to any software derived from or based on the IJG
27  * code, not just to the unmodified library. If you use our work, you ought
28  * to acknowledge us.
29  *
30  * Permission is NOT granted for the use of any IJG author's name or company
31  * name in advertising or publicity relating to this software or products
32  * derived from it. This software may be referred to only as "the Independent
33  * JPEG Group's software".
34  *
35  * We specifically permit and encourage the use of this software as the basis
36  * of commercial products, provided that all warranty or liability claims are
37  * assumed by the product vendor.
38  *
39  * This file contains the basic inverse-DCT transformation subroutine.
40  *
41  * This implementation is based on an algorithm described in
42  * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
43  * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
44  * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
45  * The primary algorithm described there uses 11 multiplies and 29 adds.
46  * We use their alternate method with 12 multiplies and 32 adds.
47  * The advantage of this method is that no data path contains more than one
48  * multiplication; this allows a very simple and accurate implementation in
49  * scaled fixed-point arithmetic, with a minimal number of shifts.
50  *
51  * I've made lots of modifications to attempt to take advantage of the
52  * sparse nature of the DCT matrices we're getting. Although the logic
53  * is cumbersome, it's straightforward and the resulting code is much
54  * faster.
55  *
56  * A better way to do this would be to pass in the DCT block as a sparse
57  * matrix, perhaps with the difference cases encoded.
58  */
59 
60 /**
61  * @file
62  * Independent JPEG Group's LLM idct.
63  */
64 
65 #include "libavutil/common.h"
66 #include "dct.h"
67 
68 #define EIGHT_BIT_SAMPLES
69 
70 #define DCTSIZE 8
71 #define DCTSIZE2 64
72 
73 #define GLOBAL
74 
75 #define RIGHT_SHIFT(x, n) ((x) >> (n))
76 
77 typedef int16_t DCTBLOCK[DCTSIZE2];
78 
79 #define CONST_BITS 13
80 
81 /*
82  * This routine is specialized to the case DCTSIZE = 8.
83  */
84 
85 #if DCTSIZE != 8
86  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
87 #endif
88 
89 
90 /*
91  * A 2-D IDCT can be done by 1-D IDCT on each row followed by 1-D IDCT
92  * on each column. Direct algorithms are also available, but they are
93  * much more complex and seem not to be any faster when reduced to code.
94  *
95  * The poop on this scaling stuff is as follows:
96  *
97  * Each 1-D IDCT step produces outputs which are a factor of sqrt(N)
98  * larger than the true IDCT outputs. The final outputs are therefore
99  * a factor of N larger than desired; since N=8 this can be cured by
100  * a simple right shift at the end of the algorithm. The advantage of
101  * this arrangement is that we save two multiplications per 1-D IDCT,
102  * because the y0 and y4 inputs need not be divided by sqrt(N).
103  *
104  * We have to do addition and subtraction of the integer inputs, which
105  * is no problem, and multiplication by fractional constants, which is
106  * a problem to do in integer arithmetic. We multiply all the constants
107  * by CONST_SCALE and convert them to integer constants (thus retaining
108  * CONST_BITS bits of precision in the constants). After doing a
109  * multiplication we have to divide the product by CONST_SCALE, with proper
110  * rounding, to produce the correct output. This division can be done
111  * cheaply as a right shift of CONST_BITS bits. We postpone shifting
112  * as long as possible so that partial sums can be added together with
113  * full fractional precision.
114  *
115  * The outputs of the first pass are scaled up by PASS1_BITS bits so that
116  * they are represented to better-than-integral precision. These outputs
117  * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
118  * with the recommended scaling. (To scale up 12-bit sample data further, an
119  * intermediate int32 array would be needed.)
120  *
121  * To avoid overflow of the 32-bit intermediate results in pass 2, we must
122  * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis
123  * shows that the values given below are the most effective.
124  */
125 
126 #ifdef EIGHT_BIT_SAMPLES
127 #define PASS1_BITS 2
128 #else
129 #define PASS1_BITS 1 /* lose a little precision to avoid overflow */
130 #endif
131 
132 #define ONE ((int32_t) 1)
133 
134 #define CONST_SCALE (ONE << CONST_BITS)
135 
136 /* Convert a positive real constant to an integer scaled by CONST_SCALE.
137  * IMPORTANT: if your compiler doesn't do this arithmetic at compile time,
138  * you will pay a significant penalty in run time. In that case, figure
139  * the correct integer constant values and insert them by hand.
140  */
141 
142 /* Actually FIX is no longer used, we precomputed them all */
143 #define FIX(x) ((int32_t) ((x) * CONST_SCALE + 0.5))
144 
145 /* Descale and correctly round an int32_t value that's scaled by N bits.
146  * We assume RIGHT_SHIFT rounds towards minus infinity, so adding
147  * the fudge factor is correct for either sign of X.
148  */
149 
150 #define DESCALE(x,n) RIGHT_SHIFT((x) + (ONE << ((n)-1)), n)
151 
152 /* Multiply an int32_t variable by an int32_t constant to yield an int32_t result.
153  * For 8-bit samples with the recommended scaling, all the variable
154  * and constant values involved are no more than 16 bits wide, so a
155  * 16x16->32 bit multiply can be used instead of a full 32x32 multiply;
156  * this provides a useful speedup on many machines.
157  * There is no way to specify a 16x16->32 multiply in portable C, but
158  * some C compilers will do the right thing if you provide the correct
159  * combination of casts.
160  * NB: for 12-bit samples, a full 32-bit multiplication will be needed.
161  */
162 
163 #ifdef EIGHT_BIT_SAMPLES
164 #ifdef SHORTxSHORT_32 /* may work if 'int' is 32 bits */
165 #define MULTIPLY(var,const) (((int16_t) (var)) * ((int16_t) (const)))
166 #endif
167 #ifdef SHORTxLCONST_32 /* known to work with Microsoft C 6.0 */
168 #define MULTIPLY(var,const) (((int16_t) (var)) * ((int32_t) (const)))
169 #endif
170 #endif
171 
172 #ifndef MULTIPLY /* default definition */
173 #define MULTIPLY(var,const) ((var) * (const))
174 #endif
175 
176 
177 /*
178  Unlike our decoder where we approximate the FIXes, we need to use exact
179 ones here or successive P-frames will drift too much with Reference frame coding
180 */
181 #define FIX_0_211164243 1730
182 #define FIX_0_275899380 2260
183 #define FIX_0_298631336 2446
184 #define FIX_0_390180644 3196
185 #define FIX_0_509795579 4176
186 #define FIX_0_541196100 4433
187 #define FIX_0_601344887 4926
188 #define FIX_0_765366865 6270
189 #define FIX_0_785694958 6436
190 #define FIX_0_899976223 7373
191 #define FIX_1_061594337 8697
192 #define FIX_1_111140466 9102
193 #define FIX_1_175875602 9633
194 #define FIX_1_306562965 10703
195 #define FIX_1_387039845 11363
196 #define FIX_1_451774981 11893
197 #define FIX_1_501321110 12299
198 #define FIX_1_662939225 13623
199 #define FIX_1_847759065 15137
200 #define FIX_1_961570560 16069
201 #define FIX_2_053119869 16819
202 #define FIX_2_172734803 17799
203 #define FIX_2_562915447 20995
204 #define FIX_3_072711026 25172
205 
206 /*
207  * Perform the inverse DCT on one block of coefficients.
208  */
209 
211 {
212  int32_t tmp0, tmp1, tmp2, tmp3;
213  int32_t tmp10, tmp11, tmp12, tmp13;
214  int32_t z1, z2, z3, z4, z5;
215  int32_t d0, d1, d2, d3, d4, d5, d6, d7;
216  register int16_t *dataptr;
217  int rowctr;
218 
219  /* Pass 1: process rows. */
220  /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
221  /* furthermore, we scale the results by 2**PASS1_BITS. */
222 
223  dataptr = data;
224 
225  for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
226  /* Due to quantization, we will usually find that many of the input
227  * coefficients are zero, especially the AC terms. We can exploit this
228  * by short-circuiting the IDCT calculation for any row in which all
229  * the AC terms are zero. In that case each output is equal to the
230  * DC coefficient (with scale factor as needed).
231  * With typical images and quantization tables, half or more of the
232  * row DCT calculations can be simplified this way.
233  */
234 
235  register int *idataptr = (int*)dataptr;
236 
237  /* WARNING: we do the same permutation as MMX idct to simplify the
238  video core */
239  d0 = dataptr[0];
240  d2 = dataptr[1];
241  d4 = dataptr[2];
242  d6 = dataptr[3];
243  d1 = dataptr[4];
244  d3 = dataptr[5];
245  d5 = dataptr[6];
246  d7 = dataptr[7];
247 
248  if ((d1 | d2 | d3 | d4 | d5 | d6 | d7) == 0) {
249  /* AC terms all zero */
250  if (d0) {
251  /* Compute a 32 bit value to assign. */
252  int16_t dcval = (int16_t) (d0 << PASS1_BITS);
253  register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000);
254 
255  idataptr[0] = v;
256  idataptr[1] = v;
257  idataptr[2] = v;
258  idataptr[3] = v;
259  }
260 
261  dataptr += DCTSIZE; /* advance pointer to next row */
262  continue;
263  }
264 
265  /* Even part: reverse the even part of the forward DCT. */
266  /* The rotator is sqrt(2)*c(-6). */
267 {
268  if (d6) {
269  if (d2) {
270  /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
271  z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
272  tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
273  tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
274 
275  tmp0 = (d0 + d4) << CONST_BITS;
276  tmp1 = (d0 - d4) << CONST_BITS;
277 
278  tmp10 = tmp0 + tmp3;
279  tmp13 = tmp0 - tmp3;
280  tmp11 = tmp1 + tmp2;
281  tmp12 = tmp1 - tmp2;
282  } else {
283  /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
284  tmp2 = MULTIPLY(-d6, FIX_1_306562965);
285  tmp3 = MULTIPLY(d6, FIX_0_541196100);
286 
287  tmp0 = (d0 + d4) << CONST_BITS;
288  tmp1 = (d0 - d4) << CONST_BITS;
289 
290  tmp10 = tmp0 + tmp3;
291  tmp13 = tmp0 - tmp3;
292  tmp11 = tmp1 + tmp2;
293  tmp12 = tmp1 - tmp2;
294  }
295  } else {
296  if (d2) {
297  /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
298  tmp2 = MULTIPLY(d2, FIX_0_541196100);
299  tmp3 = MULTIPLY(d2, FIX_1_306562965);
300 
301  tmp0 = (d0 + d4) << CONST_BITS;
302  tmp1 = (d0 - d4) << CONST_BITS;
303 
304  tmp10 = tmp0 + tmp3;
305  tmp13 = tmp0 - tmp3;
306  tmp11 = tmp1 + tmp2;
307  tmp12 = tmp1 - tmp2;
308  } else {
309  /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
310  tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
311  tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
312  }
313  }
314 
315  /* Odd part per figure 8; the matrix is unitary and hence its
316  * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
317  */
318 
319  if (d7) {
320  if (d5) {
321  if (d3) {
322  if (d1) {
323  /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
324  z1 = d7 + d1;
325  z2 = d5 + d3;
326  z3 = d7 + d3;
327  z4 = d5 + d1;
328  z5 = MULTIPLY(z3 + z4, FIX_1_175875602);
329 
330  tmp0 = MULTIPLY(d7, FIX_0_298631336);
331  tmp1 = MULTIPLY(d5, FIX_2_053119869);
332  tmp2 = MULTIPLY(d3, FIX_3_072711026);
333  tmp3 = MULTIPLY(d1, FIX_1_501321110);
334  z1 = MULTIPLY(-z1, FIX_0_899976223);
335  z2 = MULTIPLY(-z2, FIX_2_562915447);
336  z3 = MULTIPLY(-z3, FIX_1_961570560);
337  z4 = MULTIPLY(-z4, FIX_0_390180644);
338 
339  z3 += z5;
340  z4 += z5;
341 
342  tmp0 += z1 + z3;
343  tmp1 += z2 + z4;
344  tmp2 += z2 + z3;
345  tmp3 += z1 + z4;
346  } else {
347  /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
348  z2 = d5 + d3;
349  z3 = d7 + d3;
350  z5 = MULTIPLY(z3 + d5, FIX_1_175875602);
351 
352  tmp0 = MULTIPLY(d7, FIX_0_298631336);
353  tmp1 = MULTIPLY(d5, FIX_2_053119869);
354  tmp2 = MULTIPLY(d3, FIX_3_072711026);
355  z1 = MULTIPLY(-d7, FIX_0_899976223);
356  z2 = MULTIPLY(-z2, FIX_2_562915447);
357  z3 = MULTIPLY(-z3, FIX_1_961570560);
358  z4 = MULTIPLY(-d5, FIX_0_390180644);
359 
360  z3 += z5;
361  z4 += z5;
362 
363  tmp0 += z1 + z3;
364  tmp1 += z2 + z4;
365  tmp2 += z2 + z3;
366  tmp3 = z1 + z4;
367  }
368  } else {
369  if (d1) {
370  /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
371  z1 = d7 + d1;
372  z4 = d5 + d1;
373  z5 = MULTIPLY(d7 + z4, FIX_1_175875602);
374 
375  tmp0 = MULTIPLY(d7, FIX_0_298631336);
376  tmp1 = MULTIPLY(d5, FIX_2_053119869);
377  tmp3 = MULTIPLY(d1, FIX_1_501321110);
378  z1 = MULTIPLY(-z1, FIX_0_899976223);
379  z2 = MULTIPLY(-d5, FIX_2_562915447);
380  z3 = MULTIPLY(-d7, FIX_1_961570560);
381  z4 = MULTIPLY(-z4, FIX_0_390180644);
382 
383  z3 += z5;
384  z4 += z5;
385 
386  tmp0 += z1 + z3;
387  tmp1 += z2 + z4;
388  tmp2 = z2 + z3;
389  tmp3 += z1 + z4;
390  } else {
391  /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
392  tmp0 = MULTIPLY(-d7, FIX_0_601344887);
393  z1 = MULTIPLY(-d7, FIX_0_899976223);
394  z3 = MULTIPLY(-d7, FIX_1_961570560);
395  tmp1 = MULTIPLY(-d5, FIX_0_509795579);
396  z2 = MULTIPLY(-d5, FIX_2_562915447);
397  z4 = MULTIPLY(-d5, FIX_0_390180644);
398  z5 = MULTIPLY(d5 + d7, FIX_1_175875602);
399 
400  z3 += z5;
401  z4 += z5;
402 
403  tmp0 += z3;
404  tmp1 += z4;
405  tmp2 = z2 + z3;
406  tmp3 = z1 + z4;
407  }
408  }
409  } else {
410  if (d3) {
411  if (d1) {
412  /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
413  z1 = d7 + d1;
414  z3 = d7 + d3;
415  z5 = MULTIPLY(z3 + d1, FIX_1_175875602);
416 
417  tmp0 = MULTIPLY(d7, FIX_0_298631336);
418  tmp2 = MULTIPLY(d3, FIX_3_072711026);
419  tmp3 = MULTIPLY(d1, FIX_1_501321110);
420  z1 = MULTIPLY(-z1, FIX_0_899976223);
421  z2 = MULTIPLY(-d3, FIX_2_562915447);
422  z3 = MULTIPLY(-z3, FIX_1_961570560);
423  z4 = MULTIPLY(-d1, FIX_0_390180644);
424 
425  z3 += z5;
426  z4 += z5;
427 
428  tmp0 += z1 + z3;
429  tmp1 = z2 + z4;
430  tmp2 += z2 + z3;
431  tmp3 += z1 + z4;
432  } else {
433  /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
434  z3 = d7 + d3;
435 
436  tmp0 = MULTIPLY(-d7, FIX_0_601344887);
437  z1 = MULTIPLY(-d7, FIX_0_899976223);
438  tmp2 = MULTIPLY(d3, FIX_0_509795579);
439  z2 = MULTIPLY(-d3, FIX_2_562915447);
440  z5 = MULTIPLY(z3, FIX_1_175875602);
441  z3 = MULTIPLY(-z3, FIX_0_785694958);
442 
443  tmp0 += z3;
444  tmp1 = z2 + z5;
445  tmp2 += z3;
446  tmp3 = z1 + z5;
447  }
448  } else {
449  if (d1) {
450  /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
451  z1 = d7 + d1;
452  z5 = MULTIPLY(z1, FIX_1_175875602);
453 
454  z1 = MULTIPLY(z1, FIX_0_275899380);
455  z3 = MULTIPLY(-d7, FIX_1_961570560);
456  tmp0 = MULTIPLY(-d7, FIX_1_662939225);
457  z4 = MULTIPLY(-d1, FIX_0_390180644);
458  tmp3 = MULTIPLY(d1, FIX_1_111140466);
459 
460  tmp0 += z1;
461  tmp1 = z4 + z5;
462  tmp2 = z3 + z5;
463  tmp3 += z1;
464  } else {
465  /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
466  tmp0 = MULTIPLY(-d7, FIX_1_387039845);
467  tmp1 = MULTIPLY(d7, FIX_1_175875602);
468  tmp2 = MULTIPLY(-d7, FIX_0_785694958);
469  tmp3 = MULTIPLY(d7, FIX_0_275899380);
470  }
471  }
472  }
473  } else {
474  if (d5) {
475  if (d3) {
476  if (d1) {
477  /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
478  z2 = d5 + d3;
479  z4 = d5 + d1;
480  z5 = MULTIPLY(d3 + z4, FIX_1_175875602);
481 
482  tmp1 = MULTIPLY(d5, FIX_2_053119869);
483  tmp2 = MULTIPLY(d3, FIX_3_072711026);
484  tmp3 = MULTIPLY(d1, FIX_1_501321110);
485  z1 = MULTIPLY(-d1, FIX_0_899976223);
486  z2 = MULTIPLY(-z2, FIX_2_562915447);
487  z3 = MULTIPLY(-d3, FIX_1_961570560);
488  z4 = MULTIPLY(-z4, FIX_0_390180644);
489 
490  z3 += z5;
491  z4 += z5;
492 
493  tmp0 = z1 + z3;
494  tmp1 += z2 + z4;
495  tmp2 += z2 + z3;
496  tmp3 += z1 + z4;
497  } else {
498  /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
499  z2 = d5 + d3;
500 
501  z5 = MULTIPLY(z2, FIX_1_175875602);
502  tmp1 = MULTIPLY(d5, FIX_1_662939225);
503  z4 = MULTIPLY(-d5, FIX_0_390180644);
504  z2 = MULTIPLY(-z2, FIX_1_387039845);
505  tmp2 = MULTIPLY(d3, FIX_1_111140466);
506  z3 = MULTIPLY(-d3, FIX_1_961570560);
507 
508  tmp0 = z3 + z5;
509  tmp1 += z2;
510  tmp2 += z2;
511  tmp3 = z4 + z5;
512  }
513  } else {
514  if (d1) {
515  /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
516  z4 = d5 + d1;
517 
518  z5 = MULTIPLY(z4, FIX_1_175875602);
519  z1 = MULTIPLY(-d1, FIX_0_899976223);
520  tmp3 = MULTIPLY(d1, FIX_0_601344887);
521  tmp1 = MULTIPLY(-d5, FIX_0_509795579);
522  z2 = MULTIPLY(-d5, FIX_2_562915447);
523  z4 = MULTIPLY(z4, FIX_0_785694958);
524 
525  tmp0 = z1 + z5;
526  tmp1 += z4;
527  tmp2 = z2 + z5;
528  tmp3 += z4;
529  } else {
530  /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
531  tmp0 = MULTIPLY(d5, FIX_1_175875602);
532  tmp1 = MULTIPLY(d5, FIX_0_275899380);
533  tmp2 = MULTIPLY(-d5, FIX_1_387039845);
534  tmp3 = MULTIPLY(d5, FIX_0_785694958);
535  }
536  }
537  } else {
538  if (d3) {
539  if (d1) {
540  /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
541  z5 = d1 + d3;
542  tmp3 = MULTIPLY(d1, FIX_0_211164243);
543  tmp2 = MULTIPLY(-d3, FIX_1_451774981);
544  z1 = MULTIPLY(d1, FIX_1_061594337);
545  z2 = MULTIPLY(-d3, FIX_2_172734803);
546  z4 = MULTIPLY(z5, FIX_0_785694958);
547  z5 = MULTIPLY(z5, FIX_1_175875602);
548 
549  tmp0 = z1 - z4;
550  tmp1 = z2 + z4;
551  tmp2 += z5;
552  tmp3 += z5;
553  } else {
554  /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
555  tmp0 = MULTIPLY(-d3, FIX_0_785694958);
556  tmp1 = MULTIPLY(-d3, FIX_1_387039845);
557  tmp2 = MULTIPLY(-d3, FIX_0_275899380);
558  tmp3 = MULTIPLY(d3, FIX_1_175875602);
559  }
560  } else {
561  if (d1) {
562  /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
563  tmp0 = MULTIPLY(d1, FIX_0_275899380);
564  tmp1 = MULTIPLY(d1, FIX_0_785694958);
565  tmp2 = MULTIPLY(d1, FIX_1_175875602);
566  tmp3 = MULTIPLY(d1, FIX_1_387039845);
567  } else {
568  /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
569  tmp0 = tmp1 = tmp2 = tmp3 = 0;
570  }
571  }
572  }
573  }
574 }
575  /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
576 
577  dataptr[0] = (int16_t) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
578  dataptr[7] = (int16_t) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
579  dataptr[1] = (int16_t) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
580  dataptr[6] = (int16_t) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
581  dataptr[2] = (int16_t) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
582  dataptr[5] = (int16_t) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
583  dataptr[3] = (int16_t) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
584  dataptr[4] = (int16_t) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
585 
586  dataptr += DCTSIZE; /* advance pointer to next row */
587  }
588 
589  /* Pass 2: process columns. */
590  /* Note that we must descale the results by a factor of 8 == 2**3, */
591  /* and also undo the PASS1_BITS scaling. */
592 
593  dataptr = data;
594  for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
595  /* Columns of zeroes can be exploited in the same way as we did with rows.
596  * However, the row calculation has created many nonzero AC terms, so the
597  * simplification applies less often (typically 5% to 10% of the time).
598  * On machines with very fast multiplication, it's possible that the
599  * test takes more time than it's worth. In that case this section
600  * may be commented out.
601  */
602 
603  d0 = dataptr[DCTSIZE*0];
604  d1 = dataptr[DCTSIZE*1];
605  d2 = dataptr[DCTSIZE*2];
606  d3 = dataptr[DCTSIZE*3];
607  d4 = dataptr[DCTSIZE*4];
608  d5 = dataptr[DCTSIZE*5];
609  d6 = dataptr[DCTSIZE*6];
610  d7 = dataptr[DCTSIZE*7];
611 
612  /* Even part: reverse the even part of the forward DCT. */
613  /* The rotator is sqrt(2)*c(-6). */
614  if (d6) {
615  if (d2) {
616  /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
617  z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
618  tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
619  tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
620 
621  tmp0 = (d0 + d4) << CONST_BITS;
622  tmp1 = (d0 - d4) << CONST_BITS;
623 
624  tmp10 = tmp0 + tmp3;
625  tmp13 = tmp0 - tmp3;
626  tmp11 = tmp1 + tmp2;
627  tmp12 = tmp1 - tmp2;
628  } else {
629  /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
630  tmp2 = MULTIPLY(-d6, FIX_1_306562965);
631  tmp3 = MULTIPLY(d6, FIX_0_541196100);
632 
633  tmp0 = (d0 + d4) << CONST_BITS;
634  tmp1 = (d0 - d4) << CONST_BITS;
635 
636  tmp10 = tmp0 + tmp3;
637  tmp13 = tmp0 - tmp3;
638  tmp11 = tmp1 + tmp2;
639  tmp12 = tmp1 - tmp2;
640  }
641  } else {
642  if (d2) {
643  /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
644  tmp2 = MULTIPLY(d2, FIX_0_541196100);
645  tmp3 = MULTIPLY(d2, FIX_1_306562965);
646 
647  tmp0 = (d0 + d4) << CONST_BITS;
648  tmp1 = (d0 - d4) << CONST_BITS;
649 
650  tmp10 = tmp0 + tmp3;
651  tmp13 = tmp0 - tmp3;
652  tmp11 = tmp1 + tmp2;
653  tmp12 = tmp1 - tmp2;
654  } else {
655  /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
656  tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
657  tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
658  }
659  }
660 
661  /* Odd part per figure 8; the matrix is unitary and hence its
662  * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
663  */
664  if (d7) {
665  if (d5) {
666  if (d3) {
667  if (d1) {
668  /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
669  z1 = d7 + d1;
670  z2 = d5 + d3;
671  z3 = d7 + d3;
672  z4 = d5 + d1;
673  z5 = MULTIPLY(z3 + z4, FIX_1_175875602);
674 
675  tmp0 = MULTIPLY(d7, FIX_0_298631336);
676  tmp1 = MULTIPLY(d5, FIX_2_053119869);
677  tmp2 = MULTIPLY(d3, FIX_3_072711026);
678  tmp3 = MULTIPLY(d1, FIX_1_501321110);
679  z1 = MULTIPLY(-z1, FIX_0_899976223);
680  z2 = MULTIPLY(-z2, FIX_2_562915447);
681  z3 = MULTIPLY(-z3, FIX_1_961570560);
682  z4 = MULTIPLY(-z4, FIX_0_390180644);
683 
684  z3 += z5;
685  z4 += z5;
686 
687  tmp0 += z1 + z3;
688  tmp1 += z2 + z4;
689  tmp2 += z2 + z3;
690  tmp3 += z1 + z4;
691  } else {
692  /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
693  z2 = d5 + d3;
694  z3 = d7 + d3;
695  z5 = MULTIPLY(z3 + d5, FIX_1_175875602);
696 
697  tmp0 = MULTIPLY(d7, FIX_0_298631336);
698  tmp1 = MULTIPLY(d5, FIX_2_053119869);
699  tmp2 = MULTIPLY(d3, FIX_3_072711026);
700  z1 = MULTIPLY(-d7, FIX_0_899976223);
701  z2 = MULTIPLY(-z2, FIX_2_562915447);
702  z3 = MULTIPLY(-z3, FIX_1_961570560);
703  z4 = MULTIPLY(-d5, FIX_0_390180644);
704 
705  z3 += z5;
706  z4 += z5;
707 
708  tmp0 += z1 + z3;
709  tmp1 += z2 + z4;
710  tmp2 += z2 + z3;
711  tmp3 = z1 + z4;
712  }
713  } else {
714  if (d1) {
715  /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
716  z1 = d7 + d1;
717  z3 = d7;
718  z4 = d5 + d1;
719  z5 = MULTIPLY(z3 + z4, FIX_1_175875602);
720 
721  tmp0 = MULTIPLY(d7, FIX_0_298631336);
722  tmp1 = MULTIPLY(d5, FIX_2_053119869);
723  tmp3 = MULTIPLY(d1, FIX_1_501321110);
724  z1 = MULTIPLY(-z1, FIX_0_899976223);
725  z2 = MULTIPLY(-d5, FIX_2_562915447);
726  z3 = MULTIPLY(-d7, FIX_1_961570560);
727  z4 = MULTIPLY(-z4, FIX_0_390180644);
728 
729  z3 += z5;
730  z4 += z5;
731 
732  tmp0 += z1 + z3;
733  tmp1 += z2 + z4;
734  tmp2 = z2 + z3;
735  tmp3 += z1 + z4;
736  } else {
737  /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
738  tmp0 = MULTIPLY(-d7, FIX_0_601344887);
739  z1 = MULTIPLY(-d7, FIX_0_899976223);
740  z3 = MULTIPLY(-d7, FIX_1_961570560);
741  tmp1 = MULTIPLY(-d5, FIX_0_509795579);
742  z2 = MULTIPLY(-d5, FIX_2_562915447);
743  z4 = MULTIPLY(-d5, FIX_0_390180644);
744  z5 = MULTIPLY(d5 + d7, FIX_1_175875602);
745 
746  z3 += z5;
747  z4 += z5;
748 
749  tmp0 += z3;
750  tmp1 += z4;
751  tmp2 = z2 + z3;
752  tmp3 = z1 + z4;
753  }
754  }
755  } else {
756  if (d3) {
757  if (d1) {
758  /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
759  z1 = d7 + d1;
760  z3 = d7 + d3;
761  z5 = MULTIPLY(z3 + d1, FIX_1_175875602);
762 
763  tmp0 = MULTIPLY(d7, FIX_0_298631336);
764  tmp2 = MULTIPLY(d3, FIX_3_072711026);
765  tmp3 = MULTIPLY(d1, FIX_1_501321110);
766  z1 = MULTIPLY(-z1, FIX_0_899976223);
767  z2 = MULTIPLY(-d3, FIX_2_562915447);
768  z3 = MULTIPLY(-z3, FIX_1_961570560);
769  z4 = MULTIPLY(-d1, FIX_0_390180644);
770 
771  z3 += z5;
772  z4 += z5;
773 
774  tmp0 += z1 + z3;
775  tmp1 = z2 + z4;
776  tmp2 += z2 + z3;
777  tmp3 += z1 + z4;
778  } else {
779  /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
780  z3 = d7 + d3;
781 
782  tmp0 = MULTIPLY(-d7, FIX_0_601344887);
783  z1 = MULTIPLY(-d7, FIX_0_899976223);
784  tmp2 = MULTIPLY(d3, FIX_0_509795579);
785  z2 = MULTIPLY(-d3, FIX_2_562915447);
786  z5 = MULTIPLY(z3, FIX_1_175875602);
787  z3 = MULTIPLY(-z3, FIX_0_785694958);
788 
789  tmp0 += z3;
790  tmp1 = z2 + z5;
791  tmp2 += z3;
792  tmp3 = z1 + z5;
793  }
794  } else {
795  if (d1) {
796  /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
797  z1 = d7 + d1;
798  z5 = MULTIPLY(z1, FIX_1_175875602);
799 
800  z1 = MULTIPLY(z1, FIX_0_275899380);
801  z3 = MULTIPLY(-d7, FIX_1_961570560);
802  tmp0 = MULTIPLY(-d7, FIX_1_662939225);
803  z4 = MULTIPLY(-d1, FIX_0_390180644);
804  tmp3 = MULTIPLY(d1, FIX_1_111140466);
805 
806  tmp0 += z1;
807  tmp1 = z4 + z5;
808  tmp2 = z3 + z5;
809  tmp3 += z1;
810  } else {
811  /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
812  tmp0 = MULTIPLY(-d7, FIX_1_387039845);
813  tmp1 = MULTIPLY(d7, FIX_1_175875602);
814  tmp2 = MULTIPLY(-d7, FIX_0_785694958);
815  tmp3 = MULTIPLY(d7, FIX_0_275899380);
816  }
817  }
818  }
819  } else {
820  if (d5) {
821  if (d3) {
822  if (d1) {
823  /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
824  z2 = d5 + d3;
825  z4 = d5 + d1;
826  z5 = MULTIPLY(d3 + z4, FIX_1_175875602);
827 
828  tmp1 = MULTIPLY(d5, FIX_2_053119869);
829  tmp2 = MULTIPLY(d3, FIX_3_072711026);
830  tmp3 = MULTIPLY(d1, FIX_1_501321110);
831  z1 = MULTIPLY(-d1, FIX_0_899976223);
832  z2 = MULTIPLY(-z2, FIX_2_562915447);
833  z3 = MULTIPLY(-d3, FIX_1_961570560);
834  z4 = MULTIPLY(-z4, FIX_0_390180644);
835 
836  z3 += z5;
837  z4 += z5;
838 
839  tmp0 = z1 + z3;
840  tmp1 += z2 + z4;
841  tmp2 += z2 + z3;
842  tmp3 += z1 + z4;
843  } else {
844  /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
845  z2 = d5 + d3;
846 
847  z5 = MULTIPLY(z2, FIX_1_175875602);
848  tmp1 = MULTIPLY(d5, FIX_1_662939225);
849  z4 = MULTIPLY(-d5, FIX_0_390180644);
850  z2 = MULTIPLY(-z2, FIX_1_387039845);
851  tmp2 = MULTIPLY(d3, FIX_1_111140466);
852  z3 = MULTIPLY(-d3, FIX_1_961570560);
853 
854  tmp0 = z3 + z5;
855  tmp1 += z2;
856  tmp2 += z2;
857  tmp3 = z4 + z5;
858  }
859  } else {
860  if (d1) {
861  /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
862  z4 = d5 + d1;
863 
864  z5 = MULTIPLY(z4, FIX_1_175875602);
865  z1 = MULTIPLY(-d1, FIX_0_899976223);
866  tmp3 = MULTIPLY(d1, FIX_0_601344887);
867  tmp1 = MULTIPLY(-d5, FIX_0_509795579);
868  z2 = MULTIPLY(-d5, FIX_2_562915447);
869  z4 = MULTIPLY(z4, FIX_0_785694958);
870 
871  tmp0 = z1 + z5;
872  tmp1 += z4;
873  tmp2 = z2 + z5;
874  tmp3 += z4;
875  } else {
876  /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
877  tmp0 = MULTIPLY(d5, FIX_1_175875602);
878  tmp1 = MULTIPLY(d5, FIX_0_275899380);
879  tmp2 = MULTIPLY(-d5, FIX_1_387039845);
880  tmp3 = MULTIPLY(d5, FIX_0_785694958);
881  }
882  }
883  } else {
884  if (d3) {
885  if (d1) {
886  /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
887  z5 = d1 + d3;
888  tmp3 = MULTIPLY(d1, FIX_0_211164243);
889  tmp2 = MULTIPLY(-d3, FIX_1_451774981);
890  z1 = MULTIPLY(d1, FIX_1_061594337);
891  z2 = MULTIPLY(-d3, FIX_2_172734803);
892  z4 = MULTIPLY(z5, FIX_0_785694958);
893  z5 = MULTIPLY(z5, FIX_1_175875602);
894 
895  tmp0 = z1 - z4;
896  tmp1 = z2 + z4;
897  tmp2 += z5;
898  tmp3 += z5;
899  } else {
900  /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
901  tmp0 = MULTIPLY(-d3, FIX_0_785694958);
902  tmp1 = MULTIPLY(-d3, FIX_1_387039845);
903  tmp2 = MULTIPLY(-d3, FIX_0_275899380);
904  tmp3 = MULTIPLY(d3, FIX_1_175875602);
905  }
906  } else {
907  if (d1) {
908  /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
909  tmp0 = MULTIPLY(d1, FIX_0_275899380);
910  tmp1 = MULTIPLY(d1, FIX_0_785694958);
911  tmp2 = MULTIPLY(d1, FIX_1_175875602);
912  tmp3 = MULTIPLY(d1, FIX_1_387039845);
913  } else {
914  /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
915  tmp0 = tmp1 = tmp2 = tmp3 = 0;
916  }
917  }
918  }
919  }
920 
921  /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
922 
923  dataptr[DCTSIZE*0] = (int16_t) DESCALE(tmp10 + tmp3,
925  dataptr[DCTSIZE*7] = (int16_t) DESCALE(tmp10 - tmp3,
927  dataptr[DCTSIZE*1] = (int16_t) DESCALE(tmp11 + tmp2,
929  dataptr[DCTSIZE*6] = (int16_t) DESCALE(tmp11 - tmp2,
931  dataptr[DCTSIZE*2] = (int16_t) DESCALE(tmp12 + tmp1,
933  dataptr[DCTSIZE*5] = (int16_t) DESCALE(tmp12 - tmp1,
935  dataptr[DCTSIZE*3] = (int16_t) DESCALE(tmp13 + tmp0,
937  dataptr[DCTSIZE*4] = (int16_t) DESCALE(tmp13 - tmp0,
939 
940  dataptr++; /* advance pointer to next column */
941  }
942 }
943 
944 #undef DCTSIZE
945 #define DCTSIZE 4
946 #define DCTSTRIDE 8
947 
949 {
950  int32_t tmp0, tmp1, tmp2, tmp3;
951  int32_t tmp10, tmp11, tmp12, tmp13;
952  int32_t z1;
953  int32_t d0, d2, d4, d6;
954  register int16_t *dataptr;
955  int rowctr;
956 
957  /* Pass 1: process rows. */
958  /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
959  /* furthermore, we scale the results by 2**PASS1_BITS. */
960 
961  data[0] += 4;
962 
963  dataptr = data;
964 
965  for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
966  /* Due to quantization, we will usually find that many of the input
967  * coefficients are zero, especially the AC terms. We can exploit this
968  * by short-circuiting the IDCT calculation for any row in which all
969  * the AC terms are zero. In that case each output is equal to the
970  * DC coefficient (with scale factor as needed).
971  * With typical images and quantization tables, half or more of the
972  * row DCT calculations can be simplified this way.
973  */
974 
975  register int *idataptr = (int*)dataptr;
976 
977  d0 = dataptr[0];
978  d2 = dataptr[1];
979  d4 = dataptr[2];
980  d6 = dataptr[3];
981 
982  if ((d2 | d4 | d6) == 0) {
983  /* AC terms all zero */
984  if (d0) {
985  /* Compute a 32 bit value to assign. */
986  int16_t dcval = (int16_t) (d0 << PASS1_BITS);
987  register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000);
988 
989  idataptr[0] = v;
990  idataptr[1] = v;
991  }
992 
993  dataptr += DCTSTRIDE; /* advance pointer to next row */
994  continue;
995  }
996 
997  /* Even part: reverse the even part of the forward DCT. */
998  /* The rotator is sqrt(2)*c(-6). */
999  if (d6) {
1000  if (d2) {
1001  /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
1002  z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
1003  tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
1004  tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
1005 
1006  tmp0 = (d0 + d4) << CONST_BITS;
1007  tmp1 = (d0 - d4) << CONST_BITS;
1008 
1009  tmp10 = tmp0 + tmp3;
1010  tmp13 = tmp0 - tmp3;
1011  tmp11 = tmp1 + tmp2;
1012  tmp12 = tmp1 - tmp2;
1013  } else {
1014  /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
1015  tmp2 = MULTIPLY(-d6, FIX_1_306562965);
1016  tmp3 = MULTIPLY(d6, FIX_0_541196100);
1017 
1018  tmp0 = (d0 + d4) << CONST_BITS;
1019  tmp1 = (d0 - d4) << CONST_BITS;
1020 
1021  tmp10 = tmp0 + tmp3;
1022  tmp13 = tmp0 - tmp3;
1023  tmp11 = tmp1 + tmp2;
1024  tmp12 = tmp1 - tmp2;
1025  }
1026  } else {
1027  if (d2) {
1028  /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
1029  tmp2 = MULTIPLY(d2, FIX_0_541196100);
1030  tmp3 = MULTIPLY(d2, FIX_1_306562965);
1031 
1032  tmp0 = (d0 + d4) << CONST_BITS;
1033  tmp1 = (d0 - d4) << CONST_BITS;
1034 
1035  tmp10 = tmp0 + tmp3;
1036  tmp13 = tmp0 - tmp3;
1037  tmp11 = tmp1 + tmp2;
1038  tmp12 = tmp1 - tmp2;
1039  } else {
1040  /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
1041  tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
1042  tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
1043  }
1044  }
1045 
1046  /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
1047 
1048  dataptr[0] = (int16_t) DESCALE(tmp10, CONST_BITS-PASS1_BITS);
1049  dataptr[1] = (int16_t) DESCALE(tmp11, CONST_BITS-PASS1_BITS);
1050  dataptr[2] = (int16_t) DESCALE(tmp12, CONST_BITS-PASS1_BITS);
1051  dataptr[3] = (int16_t) DESCALE(tmp13, CONST_BITS-PASS1_BITS);
1052 
1053  dataptr += DCTSTRIDE; /* advance pointer to next row */
1054  }
1055 
1056  /* Pass 2: process columns. */
1057  /* Note that we must descale the results by a factor of 8 == 2**3, */
1058  /* and also undo the PASS1_BITS scaling. */
1059 
1060  dataptr = data;
1061  for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
1062  /* Columns of zeroes can be exploited in the same way as we did with rows.
1063  * However, the row calculation has created many nonzero AC terms, so the
1064  * simplification applies less often (typically 5% to 10% of the time).
1065  * On machines with very fast multiplication, it's possible that the
1066  * test takes more time than it's worth. In that case this section
1067  * may be commented out.
1068  */
1069 
1070  d0 = dataptr[DCTSTRIDE*0];
1071  d2 = dataptr[DCTSTRIDE*1];
1072  d4 = dataptr[DCTSTRIDE*2];
1073  d6 = dataptr[DCTSTRIDE*3];
1074 
1075  /* Even part: reverse the even part of the forward DCT. */
1076  /* The rotator is sqrt(2)*c(-6). */
1077  if (d6) {
1078  if (d2) {
1079  /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
1080  z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
1081  tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
1082  tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
1083 
1084  tmp0 = (d0 + d4) << CONST_BITS;
1085  tmp1 = (d0 - d4) << CONST_BITS;
1086 
1087  tmp10 = tmp0 + tmp3;
1088  tmp13 = tmp0 - tmp3;
1089  tmp11 = tmp1 + tmp2;
1090  tmp12 = tmp1 - tmp2;
1091  } else {
1092  /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
1093  tmp2 = MULTIPLY(-d6, FIX_1_306562965);
1094  tmp3 = MULTIPLY(d6, FIX_0_541196100);
1095 
1096  tmp0 = (d0 + d4) << CONST_BITS;
1097  tmp1 = (d0 - d4) << CONST_BITS;
1098 
1099  tmp10 = tmp0 + tmp3;
1100  tmp13 = tmp0 - tmp3;
1101  tmp11 = tmp1 + tmp2;
1102  tmp12 = tmp1 - tmp2;
1103  }
1104  } else {
1105  if (d2) {
1106  /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
1107  tmp2 = MULTIPLY(d2, FIX_0_541196100);
1108  tmp3 = MULTIPLY(d2, FIX_1_306562965);
1109 
1110  tmp0 = (d0 + d4) << CONST_BITS;
1111  tmp1 = (d0 - d4) << CONST_BITS;
1112 
1113  tmp10 = tmp0 + tmp3;
1114  tmp13 = tmp0 - tmp3;
1115  tmp11 = tmp1 + tmp2;
1116  tmp12 = tmp1 - tmp2;
1117  } else {
1118  /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
1119  tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
1120  tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
1121  }
1122  }
1123 
1124  /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
1125 
1126  dataptr[DCTSTRIDE*0] = tmp10 >> (CONST_BITS+PASS1_BITS+3);
1127  dataptr[DCTSTRIDE*1] = tmp11 >> (CONST_BITS+PASS1_BITS+3);
1128  dataptr[DCTSTRIDE*2] = tmp12 >> (CONST_BITS+PASS1_BITS+3);
1129  dataptr[DCTSTRIDE*3] = tmp13 >> (CONST_BITS+PASS1_BITS+3);
1130 
1131  dataptr++; /* advance pointer to next column */
1132  }
1133 }
1134 
1136  int d00, d01, d10, d11;
1137 
1138  data[0] += 4;
1139  d00 = data[0+0*DCTSTRIDE] + data[1+0*DCTSTRIDE];
1140  d01 = data[0+0*DCTSTRIDE] - data[1+0*DCTSTRIDE];
1141  d10 = data[0+1*DCTSTRIDE] + data[1+1*DCTSTRIDE];
1142  d11 = data[0+1*DCTSTRIDE] - data[1+1*DCTSTRIDE];
1143 
1144  data[0+0*DCTSTRIDE]= (d00 + d10)>>3;
1145  data[1+0*DCTSTRIDE]= (d01 + d11)>>3;
1146  data[0+1*DCTSTRIDE]= (d00 - d10)>>3;
1147  data[1+1*DCTSTRIDE]= (d01 - d11)>>3;
1148 }
1149 
1151  data[0] = (data[0] + 4)>>3;
1152 }
1153 
1154 #undef FIX
1155 #undef CONST_BITS
void ff_j_rev_dct1(DCTBLOCK data)
Definition: jrevdct.c:1150
#define FIX_0_899976223
Definition: jrevdct.c:190
float v
#define DCTSIZE
Definition: jrevdct.c:945
#define FIX_1_501321110
Definition: jrevdct.c:197
#define FIX_1_847759065
Definition: jrevdct.c:199
void ff_j_rev_dct4(DCTBLOCK data)
Definition: jrevdct.c:948
#define DESCALE(x, n)
Definition: jrevdct.c:150
#define FIX_0_541196100
Definition: jrevdct.c:186
#define FIX_0_765366865
Definition: jrevdct.c:188
#define FIX_2_562915447
Definition: jrevdct.c:203
#define FIX_2_172734803
Definition: jrevdct.c:202
int16_t DCTBLOCK[DCTSIZE2]
Definition: jrevdct.c:77
#define FIX_1_451774981
Definition: jrevdct.c:196
Spectrum Plot time data
#define DCTSIZE2
Definition: jrevdct.c:71
init variable d2
#define CONST_BITS
Definition: jrevdct.c:79
#define FIX_1_662939225
Definition: jrevdct.c:198
void ff_j_rev_dct2(DCTBLOCK data)
Definition: jrevdct.c:1135
int32_t
#define FIX_0_390180644
Definition: jrevdct.c:184
#define FIX_0_298631336
Definition: jrevdct.c:183
or the Software in violation of any applicable export control laws in any jurisdiction Except as provided by mandatorily applicable UPF has no obligation to provide you with source code to the Software In the event Software contains any source code
#define FIX_0_509795579
Definition: jrevdct.c:185
#define FIX_1_175875602
Definition: jrevdct.c:193
#define FIX_1_306562965
Definition: jrevdct.c:194
#define PASS1_BITS
Definition: jrevdct.c:127
void ff_j_rev_dct(DCTBLOCK data)
Definition: jrevdct.c:210
#define FIX_0_211164243
Definition: jrevdct.c:181
#define FIX_1_387039845
Definition: jrevdct.c:195
#define FIX_2_053119869
Definition: jrevdct.c:201
#define FIX_0_275899380
Definition: jrevdct.c:182
#define FIX_0_785694958
Definition: jrevdct.c:189
#define FIX_1_961570560
Definition: jrevdct.c:200
common internal and external API header
#define FIX_3_072711026
Definition: jrevdct.c:204
#define DCTSTRIDE
Definition: jrevdct.c:946
#define FIX_1_111140466
Definition: jrevdct.c:192
#define MULTIPLY(var, const)
Definition: jrevdct.c:173
#define FIX_0_601344887
Definition: jrevdct.c:187
#define FIX_1_061594337
Definition: jrevdct.c:191