jfdctfst.c
Go to the documentation of this file.
1 /*
2  * This file is part of the Independent JPEG Group's software.
3  *
4  * The authors make NO WARRANTY or representation, either express or implied,
5  * with respect to this software, its quality, accuracy, merchantability, or
6  * fitness for a particular purpose. This software is provided "AS IS", and
7  * you, its user, assume the entire risk as to its quality and accuracy.
8  *
9  * This software is copyright (C) 1994-1996, Thomas G. Lane.
10  * All Rights Reserved except as specified below.
11  *
12  * Permission is hereby granted to use, copy, modify, and distribute this
13  * software (or portions thereof) for any purpose, without fee, subject to
14  * these conditions:
15  * (1) If any part of the source code for this software is distributed, then
16  * this README file must be included, with this copyright and no-warranty
17  * notice unaltered; and any additions, deletions, or changes to the original
18  * files must be clearly indicated in accompanying documentation.
19  * (2) If only executable code is distributed, then the accompanying
20  * documentation must state that "this software is based in part on the work
21  * of the Independent JPEG Group".
22  * (3) Permission for use of this software is granted only if the user accepts
23  * full responsibility for any undesirable consequences; the authors accept
24  * NO LIABILITY for damages of any kind.
25  *
26  * These conditions apply to any software derived from or based on the IJG
27  * code, not just to the unmodified library. If you use our work, you ought
28  * to acknowledge us.
29  *
30  * Permission is NOT granted for the use of any IJG author's name or company
31  * name in advertising or publicity relating to this software or products
32  * derived from it. This software may be referred to only as "the Independent
33  * JPEG Group's software".
34  *
35  * We specifically permit and encourage the use of this software as the basis
36  * of commercial products, provided that all warranty or liability claims are
37  * assumed by the product vendor.
38  *
39  * This file contains a fast, not so accurate integer implementation of the
40  * forward DCT (Discrete Cosine Transform).
41  *
42  * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
43  * on each column. Direct algorithms are also available, but they are
44  * much more complex and seem not to be any faster when reduced to code.
45  *
46  * This implementation is based on Arai, Agui, and Nakajima's algorithm for
47  * scaled DCT. Their original paper (Trans. IEICE E-71(11):1095) is in
48  * Japanese, but the algorithm is described in the Pennebaker & Mitchell
49  * JPEG textbook (see REFERENCES section in file README). The following code
50  * is based directly on figure 4-8 in P&M.
51  * While an 8-point DCT cannot be done in less than 11 multiplies, it is
52  * possible to arrange the computation so that many of the multiplies are
53  * simple scalings of the final outputs. These multiplies can then be
54  * folded into the multiplications or divisions by the JPEG quantization
55  * table entries. The AA&N method leaves only 5 multiplies and 29 adds
56  * to be done in the DCT itself.
57  * The primary disadvantage of this method is that with fixed-point math,
58  * accuracy is lost due to imprecise representation of the scaled
59  * quantization values. The smaller the quantization table entry, the less
60  * precise the scaled value, so this implementation does worse with high-
61  * quality-setting files than with low-quality ones.
62  */
63 
64 /**
65  * @file
66  * Independent JPEG Group's fast AAN dct.
67  */
68 
69 #include <stdlib.h>
70 #include <stdio.h>
71 #include "libavutil/common.h"
72 #include "dct.h"
73 
74 #define DCTSIZE 8
75 #define GLOBAL(x) x
76 #define RIGHT_SHIFT(x, n) ((x) >> (n))
77 
78 /*
79  * This module is specialized to the case DCTSIZE = 8.
80  */
81 
82 #if DCTSIZE != 8
83  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
84 #endif
85 
86 
87 /* Scaling decisions are generally the same as in the LL&M algorithm;
88  * see jfdctint.c for more details. However, we choose to descale
89  * (right shift) multiplication products as soon as they are formed,
90  * rather than carrying additional fractional bits into subsequent additions.
91  * This compromises accuracy slightly, but it lets us save a few shifts.
92  * More importantly, 16-bit arithmetic is then adequate (for 8-bit samples)
93  * everywhere except in the multiplications proper; this saves a good deal
94  * of work on 16-bit-int machines.
95  *
96  * Again to save a few shifts, the intermediate results between pass 1 and
97  * pass 2 are not upscaled, but are represented only to integral precision.
98  *
99  * A final compromise is to represent the multiplicative constants to only
100  * 8 fractional bits, rather than 13. This saves some shifting work on some
101  * machines, and may also reduce the cost of multiplication (since there
102  * are fewer one-bits in the constants).
103  */
104 
105 #define CONST_BITS 8
106 
107 
108 /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
109  * causing a lot of useless floating-point operations at run time.
110  * To get around this we use the following pre-calculated constants.
111  * If you change CONST_BITS you may want to add appropriate values.
112  * (With a reasonable C compiler, you can just rely on the FIX() macro...)
113  */
114 
115 #if CONST_BITS == 8
116 #define FIX_0_382683433 ((int32_t) 98) /* FIX(0.382683433) */
117 #define FIX_0_541196100 ((int32_t) 139) /* FIX(0.541196100) */
118 #define FIX_0_707106781 ((int32_t) 181) /* FIX(0.707106781) */
119 #define FIX_1_306562965 ((int32_t) 334) /* FIX(1.306562965) */
120 #else
121 #define FIX_0_382683433 FIX(0.382683433)
122 #define FIX_0_541196100 FIX(0.541196100)
123 #define FIX_0_707106781 FIX(0.707106781)
124 #define FIX_1_306562965 FIX(1.306562965)
125 #endif
126 
127 
128 /* We can gain a little more speed, with a further compromise in accuracy,
129  * by omitting the addition in a descaling shift. This yields an incorrectly
130  * rounded result half the time...
131  */
132 
133 #ifndef USE_ACCURATE_ROUNDING
134 #undef DESCALE
135 #define DESCALE(x,n) RIGHT_SHIFT(x, n)
136 #endif
137 
138 
139 /* Multiply a int16_t variable by an int32_t constant, and immediately
140  * descale to yield a int16_t result.
141  */
142 
143 #define MULTIPLY(var,const) ((int16_t) DESCALE((var) * (const), CONST_BITS))
144 
145 static av_always_inline void row_fdct(int16_t * data){
146  int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
147  int tmp10, tmp11, tmp12, tmp13;
148  int z1, z2, z3, z4, z5, z11, z13;
149  int16_t *dataptr;
150  int ctr;
151 
152  /* Pass 1: process rows. */
153 
154  dataptr = data;
155  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
156  tmp0 = dataptr[0] + dataptr[7];
157  tmp7 = dataptr[0] - dataptr[7];
158  tmp1 = dataptr[1] + dataptr[6];
159  tmp6 = dataptr[1] - dataptr[6];
160  tmp2 = dataptr[2] + dataptr[5];
161  tmp5 = dataptr[2] - dataptr[5];
162  tmp3 = dataptr[3] + dataptr[4];
163  tmp4 = dataptr[3] - dataptr[4];
164 
165  /* Even part */
166 
167  tmp10 = tmp0 + tmp3; /* phase 2 */
168  tmp13 = tmp0 - tmp3;
169  tmp11 = tmp1 + tmp2;
170  tmp12 = tmp1 - tmp2;
171 
172  dataptr[0] = tmp10 + tmp11; /* phase 3 */
173  dataptr[4] = tmp10 - tmp11;
174 
175  z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
176  dataptr[2] = tmp13 + z1; /* phase 5 */
177  dataptr[6] = tmp13 - z1;
178 
179  /* Odd part */
180 
181  tmp10 = tmp4 + tmp5; /* phase 2 */
182  tmp11 = tmp5 + tmp6;
183  tmp12 = tmp6 + tmp7;
184 
185  /* The rotator is modified from fig 4-8 to avoid extra negations. */
186  z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
187  z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
188  z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
189  z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
190 
191  z11 = tmp7 + z3; /* phase 5 */
192  z13 = tmp7 - z3;
193 
194  dataptr[5] = z13 + z2; /* phase 6 */
195  dataptr[3] = z13 - z2;
196  dataptr[1] = z11 + z4;
197  dataptr[7] = z11 - z4;
198 
199  dataptr += DCTSIZE; /* advance pointer to next row */
200  }
201 }
202 
203 /*
204  * Perform the forward DCT on one block of samples.
205  */
206 
207 GLOBAL(void)
208 ff_fdct_ifast (int16_t * data)
209 {
210  int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
211  int tmp10, tmp11, tmp12, tmp13;
212  int z1, z2, z3, z4, z5, z11, z13;
213  int16_t *dataptr;
214  int ctr;
215 
216  row_fdct(data);
217 
218  /* Pass 2: process columns. */
219 
220  dataptr = data;
221  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
222  tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
223  tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
224  tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
225  tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
226  tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
227  tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
228  tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
229  tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
230 
231  /* Even part */
232 
233  tmp10 = tmp0 + tmp3; /* phase 2 */
234  tmp13 = tmp0 - tmp3;
235  tmp11 = tmp1 + tmp2;
236  tmp12 = tmp1 - tmp2;
237 
238  dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */
239  dataptr[DCTSIZE*4] = tmp10 - tmp11;
240 
241  z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
242  dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */
243  dataptr[DCTSIZE*6] = tmp13 - z1;
244 
245  /* Odd part */
246 
247  tmp10 = tmp4 + tmp5; /* phase 2 */
248  tmp11 = tmp5 + tmp6;
249  tmp12 = tmp6 + tmp7;
250 
251  /* The rotator is modified from fig 4-8 to avoid extra negations. */
252  z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
253  z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
254  z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
255  z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
256 
257  z11 = tmp7 + z3; /* phase 5 */
258  z13 = tmp7 - z3;
259 
260  dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */
261  dataptr[DCTSIZE*3] = z13 - z2;
262  dataptr[DCTSIZE*1] = z11 + z4;
263  dataptr[DCTSIZE*7] = z11 - z4;
264 
265  dataptr++; /* advance pointer to next column */
266  }
267 }
268 
269 /*
270  * Perform the forward 2-4-8 DCT on one block of samples.
271  */
272 
273 GLOBAL(void)
275 {
276  int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
277  int tmp10, tmp11, tmp12, tmp13;
278  int z1;
279  int16_t *dataptr;
280  int ctr;
281 
282  row_fdct(data);
283 
284  /* Pass 2: process columns. */
285 
286  dataptr = data;
287  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
288  tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*1];
289  tmp1 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
290  tmp2 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5];
291  tmp3 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7];
292  tmp4 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*1];
293  tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
294  tmp6 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5];
295  tmp7 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7];
296 
297  /* Even part */
298 
299  tmp10 = tmp0 + tmp3;
300  tmp11 = tmp1 + tmp2;
301  tmp12 = tmp1 - tmp2;
302  tmp13 = tmp0 - tmp3;
303 
304  dataptr[DCTSIZE*0] = tmp10 + tmp11;
305  dataptr[DCTSIZE*4] = tmp10 - tmp11;
306 
307  z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781);
308  dataptr[DCTSIZE*2] = tmp13 + z1;
309  dataptr[DCTSIZE*6] = tmp13 - z1;
310 
311  tmp10 = tmp4 + tmp7;
312  tmp11 = tmp5 + tmp6;
313  tmp12 = tmp5 - tmp6;
314  tmp13 = tmp4 - tmp7;
315 
316  dataptr[DCTSIZE*1] = tmp10 + tmp11;
317  dataptr[DCTSIZE*5] = tmp10 - tmp11;
318 
319  z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781);
320  dataptr[DCTSIZE*3] = tmp13 + z1;
321  dataptr[DCTSIZE*7] = tmp13 - z1;
322 
323  dataptr++; /* advance pointer to next column */
324  }
325 }
326 
327 
328 #undef GLOBAL
329 #undef CONST_BITS
330 #undef DESCALE
331 #undef FIX_0_541196100
332 #undef FIX_1_306562965
static av_always_inline void row_fdct(int16_t *data)
Definition: jfdctfst.c:145
#define GLOBAL(x)
Definition: jfdctfst.c:75
#define FIX_0_382683433
Definition: jfdctfst.c:116
Spectrum Plot time data
#define DCTSIZE
Definition: jfdctfst.c:74
#define FIX_1_306562965
Definition: jfdctfst.c:119
or the Software in violation of any applicable export control laws in any jurisdiction Except as provided by mandatorily applicable UPF has no obligation to provide you with source code to the Software In the event Software contains any source code
#define MULTIPLY(var, const)
Definition: jfdctfst.c:143
#define FIX_0_707106781
Definition: jfdctfst.c:118
ff_fdct_ifast(int16_t *data)
Definition: jfdctfst.c:208
common internal and external API header
#define FIX_0_541196100
Definition: jfdctfst.c:117
ff_fdct_ifast248(int16_t *data)
Definition: jfdctfst.c:274
#define av_always_inline
Definition: attributes.h:41