pmhd: ffmpeg/libavcodec/jfdctint

annotate ffmpeg/libavcodec/jfdctint_template.c @ 13:844d341cf643 tip

Back up before ISMIR

author	Yading Song <yading.song@eecs.qmul.ac.uk>
date	Thu, 31 Oct 2013 13:17:06 +0000
parents	6840f77b83aa
children

rev	line source
yading@10	1 /*
yading@10	2 * This file is part of the Independent JPEG Group's software.
yading@10	3 *
yading@10	4 * The authors make NO WARRANTY or representation, either express or implied,
yading@10	5 * with respect to this software, its quality, accuracy, merchantability, or
yading@10	6 * fitness for a particular purpose. This software is provided "AS IS", and
yading@10	7 * you, its user, assume the entire risk as to its quality and accuracy.
yading@10	8 *
yading@10	9 * This software is copyright (C) 1991-1996, Thomas G. Lane.
yading@10	10 * All Rights Reserved except as specified below.
yading@10	11 *
yading@10	12 * Permission is hereby granted to use, copy, modify, and distribute this
yading@10	13 * software (or portions thereof) for any purpose, without fee, subject to
yading@10	14 * these conditions:
yading@10	15 * (1) If any part of the source code for this software is distributed, then
yading@10	16 * this README file must be included, with this copyright and no-warranty
yading@10	17 * notice unaltered; and any additions, deletions, or changes to the original
yading@10	18 * files must be clearly indicated in accompanying documentation.
yading@10	19 * (2) If only executable code is distributed, then the accompanying
yading@10	20 * documentation must state that "this software is based in part on the work
yading@10	21 * of the Independent JPEG Group".
yading@10	22 * (3) Permission for use of this software is granted only if the user accepts
yading@10	23 * full responsibility for any undesirable consequences; the authors accept
yading@10	24 * NO LIABILITY for damages of any kind.
yading@10	25 *
yading@10	26 * These conditions apply to any software derived from or based on the IJG
yading@10	27 * code, not just to the unmodified library. If you use our work, you ought
yading@10	28 * to acknowledge us.
yading@10	29 *
yading@10	30 * Permission is NOT granted for the use of any IJG author's name or company
yading@10	31 * name in advertising or publicity relating to this software or products
yading@10	32 * derived from it. This software may be referred to only as "the Independent
yading@10	33 * JPEG Group's software".
yading@10	34 *
yading@10	35 * We specifically permit and encourage the use of this software as the basis
yading@10	36 * of commercial products, provided that all warranty or liability claims are
yading@10	37 * assumed by the product vendor.
yading@10	38 *
yading@10	39 * This file contains a slow-but-accurate integer implementation of the
yading@10	40 * forward DCT (Discrete Cosine Transform).
yading@10	41 *
yading@10	42 * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
yading@10	43 * on each column. Direct algorithms are also available, but they are
yading@10	44 * much more complex and seem not to be any faster when reduced to code.
yading@10	45 *
yading@10	46 * This implementation is based on an algorithm described in
yading@10	47 * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
yading@10	48 * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
yading@10	49 * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
yading@10	50 * The primary algorithm described there uses 11 multiplies and 29 adds.
yading@10	51 * We use their alternate method with 12 multiplies and 32 adds.
yading@10	52 * The advantage of this method is that no data path contains more than one
yading@10	53 * multiplication; this allows a very simple and accurate implementation in
yading@10	54 * scaled fixed-point arithmetic, with a minimal number of shifts.
yading@10	55 */
yading@10	56
yading@10	57 /**
yading@10	58 * @file
yading@10	59 * Independent JPEG Group's slow & accurate dct.
yading@10	60 */
yading@10	61
yading@10	62 #include "libavutil/common.h"
yading@10	63 #include "dct.h"
yading@10	64
yading@10	65 #include "bit_depth_template.c"
yading@10	66
yading@10	67 #define DCTSIZE 8
yading@10	68 #define BITS_IN_JSAMPLE BIT_DEPTH
yading@10	69 #define GLOBAL(x) x
yading@10	70 #define RIGHT_SHIFT(x, n) ((x) >> (n))
yading@10	71 #define MULTIPLY16C16(var,const) ((var)*(const))
yading@10	72
yading@10	73 #if 1 //def USE_ACCURATE_ROUNDING
yading@10	74 #define DESCALE(x,n) RIGHT_SHIFT((x) + (1 << ((n) - 1)), n)
yading@10	75 #else
yading@10	76 #define DESCALE(x,n) RIGHT_SHIFT(x, n)
yading@10	77 #endif
yading@10	78
yading@10	79
yading@10	80 /*
yading@10	81 * This module is specialized to the case DCTSIZE = 8.
yading@10	82 */
yading@10	83
yading@10	84 #if DCTSIZE != 8
yading@10	85 #error "Sorry, this code only copes with 8x8 DCTs."
yading@10	86 #endif
yading@10	87
yading@10	88
yading@10	89 /*
yading@10	90 * The poop on this scaling stuff is as follows:
yading@10	91 *
yading@10	92 * Each 1-D DCT step produces outputs which are a factor of sqrt(N)
yading@10	93 * larger than the true DCT outputs. The final outputs are therefore
yading@10	94 * a factor of N larger than desired; since N=8 this can be cured by
yading@10	95 * a simple right shift at the end of the algorithm. The advantage of
yading@10	96 * this arrangement is that we save two multiplications per 1-D DCT,
yading@10	97 * because the y0 and y4 outputs need not be divided by sqrt(N).
yading@10	98 * In the IJG code, this factor of 8 is removed by the quantization step
yading@10	99 * (in jcdctmgr.c), NOT in this module.
yading@10	100 *
yading@10	101 * We have to do addition and subtraction of the integer inputs, which
yading@10	102 * is no problem, and multiplication by fractional constants, which is
yading@10	103 * a problem to do in integer arithmetic. We multiply all the constants
yading@10	104 * by CONST_SCALE and convert them to integer constants (thus retaining
yading@10	105 * CONST_BITS bits of precision in the constants). After doing a
yading@10	106 * multiplication we have to divide the product by CONST_SCALE, with proper
yading@10	107 * rounding, to produce the correct output. This division can be done
yading@10	108 * cheaply as a right shift of CONST_BITS bits. We postpone shifting
yading@10	109 * as long as possible so that partial sums can be added together with
yading@10	110 * full fractional precision.
yading@10	111 *
yading@10	112 * The outputs of the first pass are scaled up by PASS1_BITS bits so that
yading@10	113 * they are represented to better-than-integral precision. These outputs
yading@10	114 * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
yading@10	115 * with the recommended scaling. (For 12-bit sample data, the intermediate
yading@10	116 * array is int32_t anyway.)
yading@10	117 *
yading@10	118 * To avoid overflow of the 32-bit intermediate results in pass 2, we must
yading@10	119 * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis
yading@10	120 * shows that the values given below are the most effective.
yading@10	121 */
yading@10	122
yading@10	123 #undef CONST_BITS
yading@10	124 #undef PASS1_BITS
yading@10	125 #undef OUT_SHIFT
yading@10	126
yading@10	127 #if BITS_IN_JSAMPLE == 8
yading@10	128 #define CONST_BITS 13
yading@10	129 #define PASS1_BITS 4 /* set this to 2 if 16x16 multiplies are faster */
yading@10	130 #define OUT_SHIFT PASS1_BITS
yading@10	131 #else
yading@10	132 #define CONST_BITS 13
yading@10	133 #define PASS1_BITS 1 /* lose a little precision to avoid overflow */
yading@10	134 #define OUT_SHIFT (PASS1_BITS + 1)
yading@10	135 #endif
yading@10	136
yading@10	137 /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
yading@10	138 * causing a lot of useless floating-point operations at run time.
yading@10	139 * To get around this we use the following pre-calculated constants.
yading@10	140 * If you change CONST_BITS you may want to add appropriate values.
yading@10	141 * (With a reasonable C compiler, you can just rely on the FIX() macro...)
yading@10	142 */
yading@10	143
yading@10	144 #if CONST_BITS == 13
yading@10	145 #define FIX_0_298631336 ((int32_t) 2446) /* FIX(0.298631336) */
yading@10	146 #define FIX_0_390180644 ((int32_t) 3196) /* FIX(0.390180644) */
yading@10	147 #define FIX_0_541196100 ((int32_t) 4433) /* FIX(0.541196100) */
yading@10	148 #define FIX_0_765366865 ((int32_t) 6270) /* FIX(0.765366865) */
yading@10	149 #define FIX_0_899976223 ((int32_t) 7373) /* FIX(0.899976223) */
yading@10	150 #define FIX_1_175875602 ((int32_t) 9633) /* FIX(1.175875602) */
yading@10	151 #define FIX_1_501321110 ((int32_t) 12299) /* FIX(1.501321110) */
yading@10	152 #define FIX_1_847759065 ((int32_t) 15137) /* FIX(1.847759065) */
yading@10	153 #define FIX_1_961570560 ((int32_t) 16069) /* FIX(1.961570560) */
yading@10	154 #define FIX_2_053119869 ((int32_t) 16819) /* FIX(2.053119869) */
yading@10	155 #define FIX_2_562915447 ((int32_t) 20995) /* FIX(2.562915447) */
yading@10	156 #define FIX_3_072711026 ((int32_t) 25172) /* FIX(3.072711026) */
yading@10	157 #else
yading@10	158 #define FIX_0_298631336 FIX(0.298631336)
yading@10	159 #define FIX_0_390180644 FIX(0.390180644)
yading@10	160 #define FIX_0_541196100 FIX(0.541196100)
yading@10	161 #define FIX_0_765366865 FIX(0.765366865)
yading@10	162 #define FIX_0_899976223 FIX(0.899976223)
yading@10	163 #define FIX_1_175875602 FIX(1.175875602)
yading@10	164 #define FIX_1_501321110 FIX(1.501321110)
yading@10	165 #define FIX_1_847759065 FIX(1.847759065)
yading@10	166 #define FIX_1_961570560 FIX(1.961570560)
yading@10	167 #define FIX_2_053119869 FIX(2.053119869)
yading@10	168 #define FIX_2_562915447 FIX(2.562915447)
yading@10	169 #define FIX_3_072711026 FIX(3.072711026)
yading@10	170 #endif
yading@10	171
yading@10	172
yading@10	173 /* Multiply an int32_t variable by an int32_t constant to yield an int32_t result.
yading@10	174 * For 8-bit samples with the recommended scaling, all the variable
yading@10	175 * and constant values involved are no more than 16 bits wide, so a
yading@10	176 * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
yading@10	177 * For 12-bit samples, a full 32-bit multiplication will be needed.
yading@10	178 */
yading@10	179
yading@10	180 #if BITS_IN_JSAMPLE == 8 && CONST_BITS<=13 && PASS1_BITS<=2
yading@10	181 #define MULTIPLY(var,const) MULTIPLY16C16(var,const)
yading@10	182 #else
yading@10	183 #define MULTIPLY(var,const) ((var) * (const))
yading@10	184 #endif
yading@10	185
yading@10	186
yading@10	187 static av_always_inline void FUNC(row_fdct)(int16_t *data)
yading@10	188 {
yading@10	189 int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
yading@10	190 int tmp10, tmp11, tmp12, tmp13;
yading@10	191 int z1, z2, z3, z4, z5;
yading@10	192 int16_t *dataptr;
yading@10	193 int ctr;
yading@10	194
yading@10	195 /* Pass 1: process rows. */
yading@10	196 /* Note results are scaled up by sqrt(8) compared to a true DCT; */
yading@10	197 /* furthermore, we scale the results by 2*PASS1_BITS. /
yading@10	198
yading@10	199 dataptr = data;
yading@10	200 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
yading@10	201 tmp0 = dataptr[0] + dataptr[7];
yading@10	202 tmp7 = dataptr[0] - dataptr[7];
yading@10	203 tmp1 = dataptr[1] + dataptr[6];
yading@10	204 tmp6 = dataptr[1] - dataptr[6];
yading@10	205 tmp2 = dataptr[2] + dataptr[5];
yading@10	206 tmp5 = dataptr[2] - dataptr[5];
yading@10	207 tmp3 = dataptr[3] + dataptr[4];
yading@10	208 tmp4 = dataptr[3] - dataptr[4];
yading@10	209
yading@10	210 /* Even part per LL&M figure 1 --- note that published figure is faulty;
yading@10	211 * rotator "sqrt(2)c1" should be "sqrt(2)c6".
yading@10	212 */
yading@10	213
yading@10	214 tmp10 = tmp0 + tmp3;
yading@10	215 tmp13 = tmp0 - tmp3;
yading@10	216 tmp11 = tmp1 + tmp2;
yading@10	217 tmp12 = tmp1 - tmp2;
yading@10	218
yading@10	219 dataptr[0] = (int16_t) ((tmp10 + tmp11) << PASS1_BITS);
yading@10	220 dataptr[4] = (int16_t) ((tmp10 - tmp11) << PASS1_BITS);
yading@10	221
yading@10	222 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
yading@10	223 dataptr[2] = (int16_t) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
yading@10	224 CONST_BITS-PASS1_BITS);
yading@10	225 dataptr[6] = (int16_t) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
yading@10	226 CONST_BITS-PASS1_BITS);
yading@10	227
yading@10	228 /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
yading@10	229 * cK represents cos(K*pi/16).
yading@10	230 * i0..i3 in the paper are tmp4..tmp7 here.
yading@10	231 */
yading@10	232
yading@10	233 z1 = tmp4 + tmp7;
yading@10	234 z2 = tmp5 + tmp6;
yading@10	235 z3 = tmp4 + tmp6;
yading@10	236 z4 = tmp5 + tmp7;
yading@10	237 z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
yading@10	238
yading@10	239 tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
yading@10	240 tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
yading@10	241 tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
yading@10	242 tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
yading@10	243 z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
yading@10	244 z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
yading@10	245 z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
yading@10	246 z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
yading@10	247
yading@10	248 z3 += z5;
yading@10	249 z4 += z5;
yading@10	250
yading@10	251 dataptr[7] = (int16_t) DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS);
yading@10	252 dataptr[5] = (int16_t) DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS);
yading@10	253 dataptr[3] = (int16_t) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS);
yading@10	254 dataptr[1] = (int16_t) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS);
yading@10	255
yading@10	256 dataptr += DCTSIZE; /* advance pointer to next row */
yading@10	257 }
yading@10	258 }
yading@10	259
yading@10	260 /*
yading@10	261 * Perform the forward DCT on one block of samples.
yading@10	262 */
yading@10	263
yading@10	264 GLOBAL(void)
yading@10	265 FUNC(ff_jpeg_fdct_islow)(int16_t *data)
yading@10	266 {
yading@10	267 int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
yading@10	268 int tmp10, tmp11, tmp12, tmp13;
yading@10	269 int z1, z2, z3, z4, z5;
yading@10	270 int16_t *dataptr;
yading@10	271 int ctr;
yading@10	272
yading@10	273 FUNC(row_fdct)(data);
yading@10	274
yading@10	275 /* Pass 2: process columns.
yading@10	276 * We remove the PASS1_BITS scaling, but leave the results scaled up
yading@10	277 * by an overall factor of 8.
yading@10	278 */
yading@10	279
yading@10	280 dataptr = data;
yading@10	281 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
yading@10	282 tmp0 = dataptr[DCTSIZE0] + dataptr[DCTSIZE7];
yading@10	283 tmp7 = dataptr[DCTSIZE0] - dataptr[DCTSIZE7];
yading@10	284 tmp1 = dataptr[DCTSIZE1] + dataptr[DCTSIZE6];
yading@10	285 tmp6 = dataptr[DCTSIZE1] - dataptr[DCTSIZE6];
yading@10	286 tmp2 = dataptr[DCTSIZE2] + dataptr[DCTSIZE5];
yading@10	287 tmp5 = dataptr[DCTSIZE2] - dataptr[DCTSIZE5];
yading@10	288 tmp3 = dataptr[DCTSIZE3] + dataptr[DCTSIZE4];
yading@10	289 tmp4 = dataptr[DCTSIZE3] - dataptr[DCTSIZE4];
yading@10	290
yading@10	291 /* Even part per LL&M figure 1 --- note that published figure is faulty;
yading@10	292 * rotator "sqrt(2)c1" should be "sqrt(2)c6".
yading@10	293 */
yading@10	294
yading@10	295 tmp10 = tmp0 + tmp3;
yading@10	296 tmp13 = tmp0 - tmp3;
yading@10	297 tmp11 = tmp1 + tmp2;
yading@10	298 tmp12 = tmp1 - tmp2;
yading@10	299
yading@10	300 dataptr[DCTSIZE*0] = DESCALE(tmp10 + tmp11, OUT_SHIFT);
yading@10	301 dataptr[DCTSIZE*4] = DESCALE(tmp10 - tmp11, OUT_SHIFT);
yading@10	302
yading@10	303 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
yading@10	304 dataptr[DCTSIZE*2] = DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
yading@10	305 CONST_BITS + OUT_SHIFT);
yading@10	306 dataptr[DCTSIZE*6] = DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
yading@10	307 CONST_BITS + OUT_SHIFT);
yading@10	308
yading@10	309 /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
yading@10	310 * cK represents cos(K*pi/16).
yading@10	311 * i0..i3 in the paper are tmp4..tmp7 here.
yading@10	312 */
yading@10	313
yading@10	314 z1 = tmp4 + tmp7;
yading@10	315 z2 = tmp5 + tmp6;
yading@10	316 z3 = tmp4 + tmp6;
yading@10	317 z4 = tmp5 + tmp7;
yading@10	318 z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
yading@10	319
yading@10	320 tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
yading@10	321 tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
yading@10	322 tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
yading@10	323 tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
yading@10	324 z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
yading@10	325 z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
yading@10	326 z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
yading@10	327 z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
yading@10	328
yading@10	329 z3 += z5;
yading@10	330 z4 += z5;
yading@10	331
yading@10	332 dataptr[DCTSIZE*7] = DESCALE(tmp4 + z1 + z3, CONST_BITS + OUT_SHIFT);
yading@10	333 dataptr[DCTSIZE*5] = DESCALE(tmp5 + z2 + z4, CONST_BITS + OUT_SHIFT);
yading@10	334 dataptr[DCTSIZE*3] = DESCALE(tmp6 + z2 + z3, CONST_BITS + OUT_SHIFT);
yading@10	335 dataptr[DCTSIZE*1] = DESCALE(tmp7 + z1 + z4, CONST_BITS + OUT_SHIFT);
yading@10	336
yading@10	337 dataptr++; /* advance pointer to next column */
yading@10	338 }
yading@10	339 }
yading@10	340
yading@10	341 /*
yading@10	342 * The secret of DCT2-4-8 is really simple -- you do the usual 1-DCT
yading@10	343 * on the rows and then, instead of doing even and odd, part on the columns
yading@10	344 * you do even part two times.
yading@10	345 */
yading@10	346 GLOBAL(void)
yading@10	347 FUNC(ff_fdct248_islow)(int16_t *data)
yading@10	348 {
yading@10	349 int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
yading@10	350 int tmp10, tmp11, tmp12, tmp13;
yading@10	351 int z1;
yading@10	352 int16_t *dataptr;
yading@10	353 int ctr;
yading@10	354
yading@10	355 FUNC(row_fdct)(data);
yading@10	356
yading@10	357 /* Pass 2: process columns.
yading@10	358 * We remove the PASS1_BITS scaling, but leave the results scaled up
yading@10	359 * by an overall factor of 8.
yading@10	360 */
yading@10	361
yading@10	362 dataptr = data;
yading@10	363 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
yading@10	364 tmp0 = dataptr[DCTSIZE0] + dataptr[DCTSIZE1];
yading@10	365 tmp1 = dataptr[DCTSIZE2] + dataptr[DCTSIZE3];
yading@10	366 tmp2 = dataptr[DCTSIZE4] + dataptr[DCTSIZE5];
yading@10	367 tmp3 = dataptr[DCTSIZE6] + dataptr[DCTSIZE7];
yading@10	368 tmp4 = dataptr[DCTSIZE0] - dataptr[DCTSIZE1];
yading@10	369 tmp5 = dataptr[DCTSIZE2] - dataptr[DCTSIZE3];
yading@10	370 tmp6 = dataptr[DCTSIZE4] - dataptr[DCTSIZE5];
yading@10	371 tmp7 = dataptr[DCTSIZE6] - dataptr[DCTSIZE7];
yading@10	372
yading@10	373 tmp10 = tmp0 + tmp3;
yading@10	374 tmp11 = tmp1 + tmp2;
yading@10	375 tmp12 = tmp1 - tmp2;
yading@10	376 tmp13 = tmp0 - tmp3;
yading@10	377
yading@10	378 dataptr[DCTSIZE*0] = DESCALE(tmp10 + tmp11, OUT_SHIFT);
yading@10	379 dataptr[DCTSIZE*4] = DESCALE(tmp10 - tmp11, OUT_SHIFT);
yading@10	380
yading@10	381 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
yading@10	382 dataptr[DCTSIZE*2] = DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
yading@10	383 CONST_BITS+OUT_SHIFT);
yading@10	384 dataptr[DCTSIZE*6] = DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
yading@10	385 CONST_BITS+OUT_SHIFT);
yading@10	386
yading@10	387 tmp10 = tmp4 + tmp7;
yading@10	388 tmp11 = tmp5 + tmp6;
yading@10	389 tmp12 = tmp5 - tmp6;
yading@10	390 tmp13 = tmp4 - tmp7;
yading@10	391
yading@10	392 dataptr[DCTSIZE*1] = DESCALE(tmp10 + tmp11, OUT_SHIFT);
yading@10	393 dataptr[DCTSIZE*5] = DESCALE(tmp10 - tmp11, OUT_SHIFT);
yading@10	394
yading@10	395 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
yading@10	396 dataptr[DCTSIZE*3] = DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
yading@10	397 CONST_BITS + OUT_SHIFT);
yading@10	398 dataptr[DCTSIZE*7] = DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
yading@10	399 CONST_BITS + OUT_SHIFT);
yading@10	400
yading@10	401 dataptr++; /* advance pointer to next column */
yading@10	402 }
yading@10	403 }

Mercurial > hg > pmhd

annotate ffmpeg/libavcodec/jfdctint_template.c @ 13:844d341cf643 tip