Mercurial > hg > sv-dependency-builds
comparison src/libmad-0.15.1b/synth.c @ 85:545efbb81310
Import initial set of sources
author | Chris Cannam <cannam@all-day-breakfast.com> |
---|---|
date | Mon, 18 Mar 2013 14:12:14 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 85:545efbb81310 |
---|---|
1 /* | |
2 * libmad - MPEG audio decoder library | |
3 * Copyright (C) 2000-2004 Underbit Technologies, Inc. | |
4 * | |
5 * This program is free software; you can redistribute it and/or modify | |
6 * it under the terms of the GNU General Public License as published by | |
7 * the Free Software Foundation; either version 2 of the License, or | |
8 * (at your option) any later version. | |
9 * | |
10 * This program is distributed in the hope that it will be useful, | |
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 * GNU General Public License for more details. | |
14 * | |
15 * You should have received a copy of the GNU General Public License | |
16 * along with this program; if not, write to the Free Software | |
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
18 * | |
19 * $Id: synth.c,v 1.25 2004/01/23 09:41:33 rob Exp $ | |
20 */ | |
21 | |
22 # ifdef HAVE_CONFIG_H | |
23 # include "config.h" | |
24 # endif | |
25 | |
26 # include "global.h" | |
27 | |
28 # include "fixed.h" | |
29 # include "frame.h" | |
30 # include "synth.h" | |
31 | |
32 /* | |
33 * NAME: synth->init() | |
34 * DESCRIPTION: initialize synth struct | |
35 */ | |
36 void mad_synth_init(struct mad_synth *synth) | |
37 { | |
38 mad_synth_mute(synth); | |
39 | |
40 synth->phase = 0; | |
41 | |
42 synth->pcm.samplerate = 0; | |
43 synth->pcm.channels = 0; | |
44 synth->pcm.length = 0; | |
45 } | |
46 | |
47 /* | |
48 * NAME: synth->mute() | |
49 * DESCRIPTION: zero all polyphase filterbank values, resetting synthesis | |
50 */ | |
51 void mad_synth_mute(struct mad_synth *synth) | |
52 { | |
53 unsigned int ch, s, v; | |
54 | |
55 for (ch = 0; ch < 2; ++ch) { | |
56 for (s = 0; s < 16; ++s) { | |
57 for (v = 0; v < 8; ++v) { | |
58 synth->filter[ch][0][0][s][v] = synth->filter[ch][0][1][s][v] = | |
59 synth->filter[ch][1][0][s][v] = synth->filter[ch][1][1][s][v] = 0; | |
60 } | |
61 } | |
62 } | |
63 } | |
64 | |
65 /* | |
66 * An optional optimization called here the Subband Synthesis Optimization | |
67 * (SSO) improves the performance of subband synthesis at the expense of | |
68 * accuracy. | |
69 * | |
70 * The idea is to simplify 32x32->64-bit multiplication to 32x32->32 such | |
71 * that extra scaling and rounding are not necessary. This often allows the | |
72 * compiler to use faster 32-bit multiply-accumulate instructions instead of | |
73 * explicit 64-bit multiply, shift, and add instructions. | |
74 * | |
75 * SSO works like this: a full 32x32->64-bit multiply of two mad_fixed_t | |
76 * values requires the result to be right-shifted 28 bits to be properly | |
77 * scaled to the same fixed-point format. Right shifts can be applied at any | |
78 * time to either operand or to the result, so the optimization involves | |
79 * careful placement of these shifts to minimize the loss of accuracy. | |
80 * | |
81 * First, a 14-bit shift is applied with rounding at compile-time to the D[] | |
82 * table of coefficients for the subband synthesis window. This only loses 2 | |
83 * bits of accuracy because the lower 12 bits are always zero. A second | |
84 * 12-bit shift occurs after the DCT calculation. This loses 12 bits of | |
85 * accuracy. Finally, a third 2-bit shift occurs just before the sample is | |
86 * saved in the PCM buffer. 14 + 12 + 2 == 28 bits. | |
87 */ | |
88 | |
89 /* FPM_DEFAULT without OPT_SSO will actually lose accuracy and performance */ | |
90 | |
91 # if defined(FPM_DEFAULT) && !defined(OPT_SSO) | |
92 # define OPT_SSO | |
93 # endif | |
94 | |
95 /* second SSO shift, with rounding */ | |
96 | |
97 # if defined(OPT_SSO) | |
98 # define SHIFT(x) (((x) + (1L << 11)) >> 12) | |
99 # else | |
100 # define SHIFT(x) (x) | |
101 # endif | |
102 | |
103 /* possible DCT speed optimization */ | |
104 | |
105 # if defined(OPT_SPEED) && defined(MAD_F_MLX) | |
106 # define OPT_DCTO | |
107 # define MUL(x, y) \ | |
108 ({ mad_fixed64hi_t hi; \ | |
109 mad_fixed64lo_t lo; \ | |
110 MAD_F_MLX(hi, lo, (x), (y)); \ | |
111 hi << (32 - MAD_F_SCALEBITS - 3); \ | |
112 }) | |
113 # else | |
114 # undef OPT_DCTO | |
115 # define MUL(x, y) mad_f_mul((x), (y)) | |
116 # endif | |
117 | |
118 /* | |
119 * NAME: dct32() | |
120 * DESCRIPTION: perform fast in[32]->out[32] DCT | |
121 */ | |
122 static | |
123 void dct32(mad_fixed_t const in[32], unsigned int slot, | |
124 mad_fixed_t lo[16][8], mad_fixed_t hi[16][8]) | |
125 { | |
126 mad_fixed_t t0, t1, t2, t3, t4, t5, t6, t7; | |
127 mad_fixed_t t8, t9, t10, t11, t12, t13, t14, t15; | |
128 mad_fixed_t t16, t17, t18, t19, t20, t21, t22, t23; | |
129 mad_fixed_t t24, t25, t26, t27, t28, t29, t30, t31; | |
130 mad_fixed_t t32, t33, t34, t35, t36, t37, t38, t39; | |
131 mad_fixed_t t40, t41, t42, t43, t44, t45, t46, t47; | |
132 mad_fixed_t t48, t49, t50, t51, t52, t53, t54, t55; | |
133 mad_fixed_t t56, t57, t58, t59, t60, t61, t62, t63; | |
134 mad_fixed_t t64, t65, t66, t67, t68, t69, t70, t71; | |
135 mad_fixed_t t72, t73, t74, t75, t76, t77, t78, t79; | |
136 mad_fixed_t t80, t81, t82, t83, t84, t85, t86, t87; | |
137 mad_fixed_t t88, t89, t90, t91, t92, t93, t94, t95; | |
138 mad_fixed_t t96, t97, t98, t99, t100, t101, t102, t103; | |
139 mad_fixed_t t104, t105, t106, t107, t108, t109, t110, t111; | |
140 mad_fixed_t t112, t113, t114, t115, t116, t117, t118, t119; | |
141 mad_fixed_t t120, t121, t122, t123, t124, t125, t126, t127; | |
142 mad_fixed_t t128, t129, t130, t131, t132, t133, t134, t135; | |
143 mad_fixed_t t136, t137, t138, t139, t140, t141, t142, t143; | |
144 mad_fixed_t t144, t145, t146, t147, t148, t149, t150, t151; | |
145 mad_fixed_t t152, t153, t154, t155, t156, t157, t158, t159; | |
146 mad_fixed_t t160, t161, t162, t163, t164, t165, t166, t167; | |
147 mad_fixed_t t168, t169, t170, t171, t172, t173, t174, t175; | |
148 mad_fixed_t t176; | |
149 | |
150 /* costab[i] = cos(PI / (2 * 32) * i) */ | |
151 | |
152 # if defined(OPT_DCTO) | |
153 # define costab1 MAD_F(0x7fd8878e) | |
154 # define costab2 MAD_F(0x7f62368f) | |
155 # define costab3 MAD_F(0x7e9d55fc) | |
156 # define costab4 MAD_F(0x7d8a5f40) | |
157 # define costab5 MAD_F(0x7c29fbee) | |
158 # define costab6 MAD_F(0x7a7d055b) | |
159 # define costab7 MAD_F(0x78848414) | |
160 # define costab8 MAD_F(0x7641af3d) | |
161 # define costab9 MAD_F(0x73b5ebd1) | |
162 # define costab10 MAD_F(0x70e2cbc6) | |
163 # define costab11 MAD_F(0x6dca0d14) | |
164 # define costab12 MAD_F(0x6a6d98a4) | |
165 # define costab13 MAD_F(0x66cf8120) | |
166 # define costab14 MAD_F(0x62f201ac) | |
167 # define costab15 MAD_F(0x5ed77c8a) | |
168 # define costab16 MAD_F(0x5a82799a) | |
169 # define costab17 MAD_F(0x55f5a4d2) | |
170 # define costab18 MAD_F(0x5133cc94) | |
171 # define costab19 MAD_F(0x4c3fdff4) | |
172 # define costab20 MAD_F(0x471cece7) | |
173 # define costab21 MAD_F(0x41ce1e65) | |
174 # define costab22 MAD_F(0x3c56ba70) | |
175 # define costab23 MAD_F(0x36ba2014) | |
176 # define costab24 MAD_F(0x30fbc54d) | |
177 # define costab25 MAD_F(0x2b1f34eb) | |
178 # define costab26 MAD_F(0x25280c5e) | |
179 # define costab27 MAD_F(0x1f19f97b) | |
180 # define costab28 MAD_F(0x18f8b83c) | |
181 # define costab29 MAD_F(0x12c8106f) | |
182 # define costab30 MAD_F(0x0c8bd35e) | |
183 # define costab31 MAD_F(0x0647d97c) | |
184 # else | |
185 # define costab1 MAD_F(0x0ffb10f2) /* 0.998795456 */ | |
186 # define costab2 MAD_F(0x0fec46d2) /* 0.995184727 */ | |
187 # define costab3 MAD_F(0x0fd3aac0) /* 0.989176510 */ | |
188 # define costab4 MAD_F(0x0fb14be8) /* 0.980785280 */ | |
189 # define costab5 MAD_F(0x0f853f7e) /* 0.970031253 */ | |
190 # define costab6 MAD_F(0x0f4fa0ab) /* 0.956940336 */ | |
191 # define costab7 MAD_F(0x0f109082) /* 0.941544065 */ | |
192 # define costab8 MAD_F(0x0ec835e8) /* 0.923879533 */ | |
193 # define costab9 MAD_F(0x0e76bd7a) /* 0.903989293 */ | |
194 # define costab10 MAD_F(0x0e1c5979) /* 0.881921264 */ | |
195 # define costab11 MAD_F(0x0db941a3) /* 0.857728610 */ | |
196 # define costab12 MAD_F(0x0d4db315) /* 0.831469612 */ | |
197 # define costab13 MAD_F(0x0cd9f024) /* 0.803207531 */ | |
198 # define costab14 MAD_F(0x0c5e4036) /* 0.773010453 */ | |
199 # define costab15 MAD_F(0x0bdaef91) /* 0.740951125 */ | |
200 # define costab16 MAD_F(0x0b504f33) /* 0.707106781 */ | |
201 # define costab17 MAD_F(0x0abeb49a) /* 0.671558955 */ | |
202 # define costab18 MAD_F(0x0a267993) /* 0.634393284 */ | |
203 # define costab19 MAD_F(0x0987fbfe) /* 0.595699304 */ | |
204 # define costab20 MAD_F(0x08e39d9d) /* 0.555570233 */ | |
205 # define costab21 MAD_F(0x0839c3cd) /* 0.514102744 */ | |
206 # define costab22 MAD_F(0x078ad74e) /* 0.471396737 */ | |
207 # define costab23 MAD_F(0x06d74402) /* 0.427555093 */ | |
208 # define costab24 MAD_F(0x061f78aa) /* 0.382683432 */ | |
209 # define costab25 MAD_F(0x0563e69d) /* 0.336889853 */ | |
210 # define costab26 MAD_F(0x04a5018c) /* 0.290284677 */ | |
211 # define costab27 MAD_F(0x03e33f2f) /* 0.242980180 */ | |
212 # define costab28 MAD_F(0x031f1708) /* 0.195090322 */ | |
213 # define costab29 MAD_F(0x0259020e) /* 0.146730474 */ | |
214 # define costab30 MAD_F(0x01917a6c) /* 0.098017140 */ | |
215 # define costab31 MAD_F(0x00c8fb30) /* 0.049067674 */ | |
216 # endif | |
217 | |
218 t0 = in[0] + in[31]; t16 = MUL(in[0] - in[31], costab1); | |
219 t1 = in[15] + in[16]; t17 = MUL(in[15] - in[16], costab31); | |
220 | |
221 t41 = t16 + t17; | |
222 t59 = MUL(t16 - t17, costab2); | |
223 t33 = t0 + t1; | |
224 t50 = MUL(t0 - t1, costab2); | |
225 | |
226 t2 = in[7] + in[24]; t18 = MUL(in[7] - in[24], costab15); | |
227 t3 = in[8] + in[23]; t19 = MUL(in[8] - in[23], costab17); | |
228 | |
229 t42 = t18 + t19; | |
230 t60 = MUL(t18 - t19, costab30); | |
231 t34 = t2 + t3; | |
232 t51 = MUL(t2 - t3, costab30); | |
233 | |
234 t4 = in[3] + in[28]; t20 = MUL(in[3] - in[28], costab7); | |
235 t5 = in[12] + in[19]; t21 = MUL(in[12] - in[19], costab25); | |
236 | |
237 t43 = t20 + t21; | |
238 t61 = MUL(t20 - t21, costab14); | |
239 t35 = t4 + t5; | |
240 t52 = MUL(t4 - t5, costab14); | |
241 | |
242 t6 = in[4] + in[27]; t22 = MUL(in[4] - in[27], costab9); | |
243 t7 = in[11] + in[20]; t23 = MUL(in[11] - in[20], costab23); | |
244 | |
245 t44 = t22 + t23; | |
246 t62 = MUL(t22 - t23, costab18); | |
247 t36 = t6 + t7; | |
248 t53 = MUL(t6 - t7, costab18); | |
249 | |
250 t8 = in[1] + in[30]; t24 = MUL(in[1] - in[30], costab3); | |
251 t9 = in[14] + in[17]; t25 = MUL(in[14] - in[17], costab29); | |
252 | |
253 t45 = t24 + t25; | |
254 t63 = MUL(t24 - t25, costab6); | |
255 t37 = t8 + t9; | |
256 t54 = MUL(t8 - t9, costab6); | |
257 | |
258 t10 = in[6] + in[25]; t26 = MUL(in[6] - in[25], costab13); | |
259 t11 = in[9] + in[22]; t27 = MUL(in[9] - in[22], costab19); | |
260 | |
261 t46 = t26 + t27; | |
262 t64 = MUL(t26 - t27, costab26); | |
263 t38 = t10 + t11; | |
264 t55 = MUL(t10 - t11, costab26); | |
265 | |
266 t12 = in[2] + in[29]; t28 = MUL(in[2] - in[29], costab5); | |
267 t13 = in[13] + in[18]; t29 = MUL(in[13] - in[18], costab27); | |
268 | |
269 t47 = t28 + t29; | |
270 t65 = MUL(t28 - t29, costab10); | |
271 t39 = t12 + t13; | |
272 t56 = MUL(t12 - t13, costab10); | |
273 | |
274 t14 = in[5] + in[26]; t30 = MUL(in[5] - in[26], costab11); | |
275 t15 = in[10] + in[21]; t31 = MUL(in[10] - in[21], costab21); | |
276 | |
277 t48 = t30 + t31; | |
278 t66 = MUL(t30 - t31, costab22); | |
279 t40 = t14 + t15; | |
280 t57 = MUL(t14 - t15, costab22); | |
281 | |
282 t69 = t33 + t34; t89 = MUL(t33 - t34, costab4); | |
283 t70 = t35 + t36; t90 = MUL(t35 - t36, costab28); | |
284 t71 = t37 + t38; t91 = MUL(t37 - t38, costab12); | |
285 t72 = t39 + t40; t92 = MUL(t39 - t40, costab20); | |
286 t73 = t41 + t42; t94 = MUL(t41 - t42, costab4); | |
287 t74 = t43 + t44; t95 = MUL(t43 - t44, costab28); | |
288 t75 = t45 + t46; t96 = MUL(t45 - t46, costab12); | |
289 t76 = t47 + t48; t97 = MUL(t47 - t48, costab20); | |
290 | |
291 t78 = t50 + t51; t100 = MUL(t50 - t51, costab4); | |
292 t79 = t52 + t53; t101 = MUL(t52 - t53, costab28); | |
293 t80 = t54 + t55; t102 = MUL(t54 - t55, costab12); | |
294 t81 = t56 + t57; t103 = MUL(t56 - t57, costab20); | |
295 | |
296 t83 = t59 + t60; t106 = MUL(t59 - t60, costab4); | |
297 t84 = t61 + t62; t107 = MUL(t61 - t62, costab28); | |
298 t85 = t63 + t64; t108 = MUL(t63 - t64, costab12); | |
299 t86 = t65 + t66; t109 = MUL(t65 - t66, costab20); | |
300 | |
301 t113 = t69 + t70; | |
302 t114 = t71 + t72; | |
303 | |
304 /* 0 */ hi[15][slot] = SHIFT(t113 + t114); | |
305 /* 16 */ lo[ 0][slot] = SHIFT(MUL(t113 - t114, costab16)); | |
306 | |
307 t115 = t73 + t74; | |
308 t116 = t75 + t76; | |
309 | |
310 t32 = t115 + t116; | |
311 | |
312 /* 1 */ hi[14][slot] = SHIFT(t32); | |
313 | |
314 t118 = t78 + t79; | |
315 t119 = t80 + t81; | |
316 | |
317 t58 = t118 + t119; | |
318 | |
319 /* 2 */ hi[13][slot] = SHIFT(t58); | |
320 | |
321 t121 = t83 + t84; | |
322 t122 = t85 + t86; | |
323 | |
324 t67 = t121 + t122; | |
325 | |
326 t49 = (t67 * 2) - t32; | |
327 | |
328 /* 3 */ hi[12][slot] = SHIFT(t49); | |
329 | |
330 t125 = t89 + t90; | |
331 t126 = t91 + t92; | |
332 | |
333 t93 = t125 + t126; | |
334 | |
335 /* 4 */ hi[11][slot] = SHIFT(t93); | |
336 | |
337 t128 = t94 + t95; | |
338 t129 = t96 + t97; | |
339 | |
340 t98 = t128 + t129; | |
341 | |
342 t68 = (t98 * 2) - t49; | |
343 | |
344 /* 5 */ hi[10][slot] = SHIFT(t68); | |
345 | |
346 t132 = t100 + t101; | |
347 t133 = t102 + t103; | |
348 | |
349 t104 = t132 + t133; | |
350 | |
351 t82 = (t104 * 2) - t58; | |
352 | |
353 /* 6 */ hi[ 9][slot] = SHIFT(t82); | |
354 | |
355 t136 = t106 + t107; | |
356 t137 = t108 + t109; | |
357 | |
358 t110 = t136 + t137; | |
359 | |
360 t87 = (t110 * 2) - t67; | |
361 | |
362 t77 = (t87 * 2) - t68; | |
363 | |
364 /* 7 */ hi[ 8][slot] = SHIFT(t77); | |
365 | |
366 t141 = MUL(t69 - t70, costab8); | |
367 t142 = MUL(t71 - t72, costab24); | |
368 t143 = t141 + t142; | |
369 | |
370 /* 8 */ hi[ 7][slot] = SHIFT(t143); | |
371 /* 24 */ lo[ 8][slot] = | |
372 SHIFT((MUL(t141 - t142, costab16) * 2) - t143); | |
373 | |
374 t144 = MUL(t73 - t74, costab8); | |
375 t145 = MUL(t75 - t76, costab24); | |
376 t146 = t144 + t145; | |
377 | |
378 t88 = (t146 * 2) - t77; | |
379 | |
380 /* 9 */ hi[ 6][slot] = SHIFT(t88); | |
381 | |
382 t148 = MUL(t78 - t79, costab8); | |
383 t149 = MUL(t80 - t81, costab24); | |
384 t150 = t148 + t149; | |
385 | |
386 t105 = (t150 * 2) - t82; | |
387 | |
388 /* 10 */ hi[ 5][slot] = SHIFT(t105); | |
389 | |
390 t152 = MUL(t83 - t84, costab8); | |
391 t153 = MUL(t85 - t86, costab24); | |
392 t154 = t152 + t153; | |
393 | |
394 t111 = (t154 * 2) - t87; | |
395 | |
396 t99 = (t111 * 2) - t88; | |
397 | |
398 /* 11 */ hi[ 4][slot] = SHIFT(t99); | |
399 | |
400 t157 = MUL(t89 - t90, costab8); | |
401 t158 = MUL(t91 - t92, costab24); | |
402 t159 = t157 + t158; | |
403 | |
404 t127 = (t159 * 2) - t93; | |
405 | |
406 /* 12 */ hi[ 3][slot] = SHIFT(t127); | |
407 | |
408 t160 = (MUL(t125 - t126, costab16) * 2) - t127; | |
409 | |
410 /* 20 */ lo[ 4][slot] = SHIFT(t160); | |
411 /* 28 */ lo[12][slot] = | |
412 SHIFT((((MUL(t157 - t158, costab16) * 2) - t159) * 2) - t160); | |
413 | |
414 t161 = MUL(t94 - t95, costab8); | |
415 t162 = MUL(t96 - t97, costab24); | |
416 t163 = t161 + t162; | |
417 | |
418 t130 = (t163 * 2) - t98; | |
419 | |
420 t112 = (t130 * 2) - t99; | |
421 | |
422 /* 13 */ hi[ 2][slot] = SHIFT(t112); | |
423 | |
424 t164 = (MUL(t128 - t129, costab16) * 2) - t130; | |
425 | |
426 t166 = MUL(t100 - t101, costab8); | |
427 t167 = MUL(t102 - t103, costab24); | |
428 t168 = t166 + t167; | |
429 | |
430 t134 = (t168 * 2) - t104; | |
431 | |
432 t120 = (t134 * 2) - t105; | |
433 | |
434 /* 14 */ hi[ 1][slot] = SHIFT(t120); | |
435 | |
436 t135 = (MUL(t118 - t119, costab16) * 2) - t120; | |
437 | |
438 /* 18 */ lo[ 2][slot] = SHIFT(t135); | |
439 | |
440 t169 = (MUL(t132 - t133, costab16) * 2) - t134; | |
441 | |
442 t151 = (t169 * 2) - t135; | |
443 | |
444 /* 22 */ lo[ 6][slot] = SHIFT(t151); | |
445 | |
446 t170 = (((MUL(t148 - t149, costab16) * 2) - t150) * 2) - t151; | |
447 | |
448 /* 26 */ lo[10][slot] = SHIFT(t170); | |
449 /* 30 */ lo[14][slot] = | |
450 SHIFT((((((MUL(t166 - t167, costab16) * 2) - | |
451 t168) * 2) - t169) * 2) - t170); | |
452 | |
453 t171 = MUL(t106 - t107, costab8); | |
454 t172 = MUL(t108 - t109, costab24); | |
455 t173 = t171 + t172; | |
456 | |
457 t138 = (t173 * 2) - t110; | |
458 | |
459 t123 = (t138 * 2) - t111; | |
460 | |
461 t139 = (MUL(t121 - t122, costab16) * 2) - t123; | |
462 | |
463 t117 = (t123 * 2) - t112; | |
464 | |
465 /* 15 */ hi[ 0][slot] = SHIFT(t117); | |
466 | |
467 t124 = (MUL(t115 - t116, costab16) * 2) - t117; | |
468 | |
469 /* 17 */ lo[ 1][slot] = SHIFT(t124); | |
470 | |
471 t131 = (t139 * 2) - t124; | |
472 | |
473 /* 19 */ lo[ 3][slot] = SHIFT(t131); | |
474 | |
475 t140 = (t164 * 2) - t131; | |
476 | |
477 /* 21 */ lo[ 5][slot] = SHIFT(t140); | |
478 | |
479 t174 = (MUL(t136 - t137, costab16) * 2) - t138; | |
480 | |
481 t155 = (t174 * 2) - t139; | |
482 | |
483 t147 = (t155 * 2) - t140; | |
484 | |
485 /* 23 */ lo[ 7][slot] = SHIFT(t147); | |
486 | |
487 t156 = (((MUL(t144 - t145, costab16) * 2) - t146) * 2) - t147; | |
488 | |
489 /* 25 */ lo[ 9][slot] = SHIFT(t156); | |
490 | |
491 t175 = (((MUL(t152 - t153, costab16) * 2) - t154) * 2) - t155; | |
492 | |
493 t165 = (t175 * 2) - t156; | |
494 | |
495 /* 27 */ lo[11][slot] = SHIFT(t165); | |
496 | |
497 t176 = (((((MUL(t161 - t162, costab16) * 2) - | |
498 t163) * 2) - t164) * 2) - t165; | |
499 | |
500 /* 29 */ lo[13][slot] = SHIFT(t176); | |
501 /* 31 */ lo[15][slot] = | |
502 SHIFT((((((((MUL(t171 - t172, costab16) * 2) - | |
503 t173) * 2) - t174) * 2) - t175) * 2) - t176); | |
504 | |
505 /* | |
506 * Totals: | |
507 * 80 multiplies | |
508 * 80 additions | |
509 * 119 subtractions | |
510 * 49 shifts (not counting SSO) | |
511 */ | |
512 } | |
513 | |
514 # undef MUL | |
515 # undef SHIFT | |
516 | |
517 /* third SSO shift and/or D[] optimization preshift */ | |
518 | |
519 # if defined(OPT_SSO) | |
520 # if MAD_F_FRACBITS != 28 | |
521 # error "MAD_F_FRACBITS must be 28 to use OPT_SSO" | |
522 # endif | |
523 # define ML0(hi, lo, x, y) ((lo) = (x) * (y)) | |
524 # define MLA(hi, lo, x, y) ((lo) += (x) * (y)) | |
525 # define MLN(hi, lo) ((lo) = -(lo)) | |
526 # define MLZ(hi, lo) ((void) (hi), (mad_fixed_t) (lo)) | |
527 # define SHIFT(x) ((x) >> 2) | |
528 # define PRESHIFT(x) ((MAD_F(x) + (1L << 13)) >> 14) | |
529 # else | |
530 # define ML0(hi, lo, x, y) MAD_F_ML0((hi), (lo), (x), (y)) | |
531 # define MLA(hi, lo, x, y) MAD_F_MLA((hi), (lo), (x), (y)) | |
532 # define MLN(hi, lo) MAD_F_MLN((hi), (lo)) | |
533 # define MLZ(hi, lo) MAD_F_MLZ((hi), (lo)) | |
534 # define SHIFT(x) (x) | |
535 # if defined(MAD_F_SCALEBITS) | |
536 # undef MAD_F_SCALEBITS | |
537 # define MAD_F_SCALEBITS (MAD_F_FRACBITS - 12) | |
538 # define PRESHIFT(x) (MAD_F(x) >> 12) | |
539 # else | |
540 # define PRESHIFT(x) MAD_F(x) | |
541 # endif | |
542 # endif | |
543 | |
544 static | |
545 mad_fixed_t const D[17][32] = { | |
546 # include "D.dat" | |
547 }; | |
548 | |
549 # if defined(ASO_SYNTH) | |
550 void synth_full(struct mad_synth *, struct mad_frame const *, | |
551 unsigned int, unsigned int); | |
552 # else | |
553 /* | |
554 * NAME: synth->full() | |
555 * DESCRIPTION: perform full frequency PCM synthesis | |
556 */ | |
557 static | |
558 void synth_full(struct mad_synth *synth, struct mad_frame const *frame, | |
559 unsigned int nch, unsigned int ns) | |
560 { | |
561 unsigned int phase, ch, s, sb, pe, po; | |
562 mad_fixed_t *pcm1, *pcm2, (*filter)[2][2][16][8]; | |
563 mad_fixed_t const (*sbsample)[36][32]; | |
564 register mad_fixed_t (*fe)[8], (*fx)[8], (*fo)[8]; | |
565 register mad_fixed_t const (*Dptr)[32], *ptr; | |
566 register mad_fixed64hi_t hi; | |
567 register mad_fixed64lo_t lo; | |
568 | |
569 for (ch = 0; ch < nch; ++ch) { | |
570 sbsample = &frame->sbsample[ch]; | |
571 filter = &synth->filter[ch]; | |
572 phase = synth->phase; | |
573 pcm1 = synth->pcm.samples[ch]; | |
574 | |
575 for (s = 0; s < ns; ++s) { | |
576 dct32((*sbsample)[s], phase >> 1, | |
577 (*filter)[0][phase & 1], (*filter)[1][phase & 1]); | |
578 | |
579 pe = phase & ~1; | |
580 po = ((phase - 1) & 0xf) | 1; | |
581 | |
582 /* calculate 32 samples */ | |
583 | |
584 fe = &(*filter)[0][ phase & 1][0]; | |
585 fx = &(*filter)[0][~phase & 1][0]; | |
586 fo = &(*filter)[1][~phase & 1][0]; | |
587 | |
588 Dptr = &D[0]; | |
589 | |
590 ptr = *Dptr + po; | |
591 ML0(hi, lo, (*fx)[0], ptr[ 0]); | |
592 MLA(hi, lo, (*fx)[1], ptr[14]); | |
593 MLA(hi, lo, (*fx)[2], ptr[12]); | |
594 MLA(hi, lo, (*fx)[3], ptr[10]); | |
595 MLA(hi, lo, (*fx)[4], ptr[ 8]); | |
596 MLA(hi, lo, (*fx)[5], ptr[ 6]); | |
597 MLA(hi, lo, (*fx)[6], ptr[ 4]); | |
598 MLA(hi, lo, (*fx)[7], ptr[ 2]); | |
599 MLN(hi, lo); | |
600 | |
601 ptr = *Dptr + pe; | |
602 MLA(hi, lo, (*fe)[0], ptr[ 0]); | |
603 MLA(hi, lo, (*fe)[1], ptr[14]); | |
604 MLA(hi, lo, (*fe)[2], ptr[12]); | |
605 MLA(hi, lo, (*fe)[3], ptr[10]); | |
606 MLA(hi, lo, (*fe)[4], ptr[ 8]); | |
607 MLA(hi, lo, (*fe)[5], ptr[ 6]); | |
608 MLA(hi, lo, (*fe)[6], ptr[ 4]); | |
609 MLA(hi, lo, (*fe)[7], ptr[ 2]); | |
610 | |
611 *pcm1++ = SHIFT(MLZ(hi, lo)); | |
612 | |
613 pcm2 = pcm1 + 30; | |
614 | |
615 for (sb = 1; sb < 16; ++sb) { | |
616 ++fe; | |
617 ++Dptr; | |
618 | |
619 /* D[32 - sb][i] == -D[sb][31 - i] */ | |
620 | |
621 ptr = *Dptr + po; | |
622 ML0(hi, lo, (*fo)[0], ptr[ 0]); | |
623 MLA(hi, lo, (*fo)[1], ptr[14]); | |
624 MLA(hi, lo, (*fo)[2], ptr[12]); | |
625 MLA(hi, lo, (*fo)[3], ptr[10]); | |
626 MLA(hi, lo, (*fo)[4], ptr[ 8]); | |
627 MLA(hi, lo, (*fo)[5], ptr[ 6]); | |
628 MLA(hi, lo, (*fo)[6], ptr[ 4]); | |
629 MLA(hi, lo, (*fo)[7], ptr[ 2]); | |
630 MLN(hi, lo); | |
631 | |
632 ptr = *Dptr + pe; | |
633 MLA(hi, lo, (*fe)[7], ptr[ 2]); | |
634 MLA(hi, lo, (*fe)[6], ptr[ 4]); | |
635 MLA(hi, lo, (*fe)[5], ptr[ 6]); | |
636 MLA(hi, lo, (*fe)[4], ptr[ 8]); | |
637 MLA(hi, lo, (*fe)[3], ptr[10]); | |
638 MLA(hi, lo, (*fe)[2], ptr[12]); | |
639 MLA(hi, lo, (*fe)[1], ptr[14]); | |
640 MLA(hi, lo, (*fe)[0], ptr[ 0]); | |
641 | |
642 *pcm1++ = SHIFT(MLZ(hi, lo)); | |
643 | |
644 ptr = *Dptr - pe; | |
645 ML0(hi, lo, (*fe)[0], ptr[31 - 16]); | |
646 MLA(hi, lo, (*fe)[1], ptr[31 - 14]); | |
647 MLA(hi, lo, (*fe)[2], ptr[31 - 12]); | |
648 MLA(hi, lo, (*fe)[3], ptr[31 - 10]); | |
649 MLA(hi, lo, (*fe)[4], ptr[31 - 8]); | |
650 MLA(hi, lo, (*fe)[5], ptr[31 - 6]); | |
651 MLA(hi, lo, (*fe)[6], ptr[31 - 4]); | |
652 MLA(hi, lo, (*fe)[7], ptr[31 - 2]); | |
653 | |
654 ptr = *Dptr - po; | |
655 MLA(hi, lo, (*fo)[7], ptr[31 - 2]); | |
656 MLA(hi, lo, (*fo)[6], ptr[31 - 4]); | |
657 MLA(hi, lo, (*fo)[5], ptr[31 - 6]); | |
658 MLA(hi, lo, (*fo)[4], ptr[31 - 8]); | |
659 MLA(hi, lo, (*fo)[3], ptr[31 - 10]); | |
660 MLA(hi, lo, (*fo)[2], ptr[31 - 12]); | |
661 MLA(hi, lo, (*fo)[1], ptr[31 - 14]); | |
662 MLA(hi, lo, (*fo)[0], ptr[31 - 16]); | |
663 | |
664 *pcm2-- = SHIFT(MLZ(hi, lo)); | |
665 | |
666 ++fo; | |
667 } | |
668 | |
669 ++Dptr; | |
670 | |
671 ptr = *Dptr + po; | |
672 ML0(hi, lo, (*fo)[0], ptr[ 0]); | |
673 MLA(hi, lo, (*fo)[1], ptr[14]); | |
674 MLA(hi, lo, (*fo)[2], ptr[12]); | |
675 MLA(hi, lo, (*fo)[3], ptr[10]); | |
676 MLA(hi, lo, (*fo)[4], ptr[ 8]); | |
677 MLA(hi, lo, (*fo)[5], ptr[ 6]); | |
678 MLA(hi, lo, (*fo)[6], ptr[ 4]); | |
679 MLA(hi, lo, (*fo)[7], ptr[ 2]); | |
680 | |
681 *pcm1 = SHIFT(-MLZ(hi, lo)); | |
682 pcm1 += 16; | |
683 | |
684 phase = (phase + 1) % 16; | |
685 } | |
686 } | |
687 } | |
688 # endif | |
689 | |
690 /* | |
691 * NAME: synth->half() | |
692 * DESCRIPTION: perform half frequency PCM synthesis | |
693 */ | |
694 static | |
695 void synth_half(struct mad_synth *synth, struct mad_frame const *frame, | |
696 unsigned int nch, unsigned int ns) | |
697 { | |
698 unsigned int phase, ch, s, sb, pe, po; | |
699 mad_fixed_t *pcm1, *pcm2, (*filter)[2][2][16][8]; | |
700 mad_fixed_t const (*sbsample)[36][32]; | |
701 register mad_fixed_t (*fe)[8], (*fx)[8], (*fo)[8]; | |
702 register mad_fixed_t const (*Dptr)[32], *ptr; | |
703 register mad_fixed64hi_t hi; | |
704 register mad_fixed64lo_t lo; | |
705 | |
706 for (ch = 0; ch < nch; ++ch) { | |
707 sbsample = &frame->sbsample[ch]; | |
708 filter = &synth->filter[ch]; | |
709 phase = synth->phase; | |
710 pcm1 = synth->pcm.samples[ch]; | |
711 | |
712 for (s = 0; s < ns; ++s) { | |
713 dct32((*sbsample)[s], phase >> 1, | |
714 (*filter)[0][phase & 1], (*filter)[1][phase & 1]); | |
715 | |
716 pe = phase & ~1; | |
717 po = ((phase - 1) & 0xf) | 1; | |
718 | |
719 /* calculate 16 samples */ | |
720 | |
721 fe = &(*filter)[0][ phase & 1][0]; | |
722 fx = &(*filter)[0][~phase & 1][0]; | |
723 fo = &(*filter)[1][~phase & 1][0]; | |
724 | |
725 Dptr = &D[0]; | |
726 | |
727 ptr = *Dptr + po; | |
728 ML0(hi, lo, (*fx)[0], ptr[ 0]); | |
729 MLA(hi, lo, (*fx)[1], ptr[14]); | |
730 MLA(hi, lo, (*fx)[2], ptr[12]); | |
731 MLA(hi, lo, (*fx)[3], ptr[10]); | |
732 MLA(hi, lo, (*fx)[4], ptr[ 8]); | |
733 MLA(hi, lo, (*fx)[5], ptr[ 6]); | |
734 MLA(hi, lo, (*fx)[6], ptr[ 4]); | |
735 MLA(hi, lo, (*fx)[7], ptr[ 2]); | |
736 MLN(hi, lo); | |
737 | |
738 ptr = *Dptr + pe; | |
739 MLA(hi, lo, (*fe)[0], ptr[ 0]); | |
740 MLA(hi, lo, (*fe)[1], ptr[14]); | |
741 MLA(hi, lo, (*fe)[2], ptr[12]); | |
742 MLA(hi, lo, (*fe)[3], ptr[10]); | |
743 MLA(hi, lo, (*fe)[4], ptr[ 8]); | |
744 MLA(hi, lo, (*fe)[5], ptr[ 6]); | |
745 MLA(hi, lo, (*fe)[6], ptr[ 4]); | |
746 MLA(hi, lo, (*fe)[7], ptr[ 2]); | |
747 | |
748 *pcm1++ = SHIFT(MLZ(hi, lo)); | |
749 | |
750 pcm2 = pcm1 + 14; | |
751 | |
752 for (sb = 1; sb < 16; ++sb) { | |
753 ++fe; | |
754 ++Dptr; | |
755 | |
756 /* D[32 - sb][i] == -D[sb][31 - i] */ | |
757 | |
758 if (!(sb & 1)) { | |
759 ptr = *Dptr + po; | |
760 ML0(hi, lo, (*fo)[0], ptr[ 0]); | |
761 MLA(hi, lo, (*fo)[1], ptr[14]); | |
762 MLA(hi, lo, (*fo)[2], ptr[12]); | |
763 MLA(hi, lo, (*fo)[3], ptr[10]); | |
764 MLA(hi, lo, (*fo)[4], ptr[ 8]); | |
765 MLA(hi, lo, (*fo)[5], ptr[ 6]); | |
766 MLA(hi, lo, (*fo)[6], ptr[ 4]); | |
767 MLA(hi, lo, (*fo)[7], ptr[ 2]); | |
768 MLN(hi, lo); | |
769 | |
770 ptr = *Dptr + pe; | |
771 MLA(hi, lo, (*fe)[7], ptr[ 2]); | |
772 MLA(hi, lo, (*fe)[6], ptr[ 4]); | |
773 MLA(hi, lo, (*fe)[5], ptr[ 6]); | |
774 MLA(hi, lo, (*fe)[4], ptr[ 8]); | |
775 MLA(hi, lo, (*fe)[3], ptr[10]); | |
776 MLA(hi, lo, (*fe)[2], ptr[12]); | |
777 MLA(hi, lo, (*fe)[1], ptr[14]); | |
778 MLA(hi, lo, (*fe)[0], ptr[ 0]); | |
779 | |
780 *pcm1++ = SHIFT(MLZ(hi, lo)); | |
781 | |
782 ptr = *Dptr - po; | |
783 ML0(hi, lo, (*fo)[7], ptr[31 - 2]); | |
784 MLA(hi, lo, (*fo)[6], ptr[31 - 4]); | |
785 MLA(hi, lo, (*fo)[5], ptr[31 - 6]); | |
786 MLA(hi, lo, (*fo)[4], ptr[31 - 8]); | |
787 MLA(hi, lo, (*fo)[3], ptr[31 - 10]); | |
788 MLA(hi, lo, (*fo)[2], ptr[31 - 12]); | |
789 MLA(hi, lo, (*fo)[1], ptr[31 - 14]); | |
790 MLA(hi, lo, (*fo)[0], ptr[31 - 16]); | |
791 | |
792 ptr = *Dptr - pe; | |
793 MLA(hi, lo, (*fe)[0], ptr[31 - 16]); | |
794 MLA(hi, lo, (*fe)[1], ptr[31 - 14]); | |
795 MLA(hi, lo, (*fe)[2], ptr[31 - 12]); | |
796 MLA(hi, lo, (*fe)[3], ptr[31 - 10]); | |
797 MLA(hi, lo, (*fe)[4], ptr[31 - 8]); | |
798 MLA(hi, lo, (*fe)[5], ptr[31 - 6]); | |
799 MLA(hi, lo, (*fe)[6], ptr[31 - 4]); | |
800 MLA(hi, lo, (*fe)[7], ptr[31 - 2]); | |
801 | |
802 *pcm2-- = SHIFT(MLZ(hi, lo)); | |
803 } | |
804 | |
805 ++fo; | |
806 } | |
807 | |
808 ++Dptr; | |
809 | |
810 ptr = *Dptr + po; | |
811 ML0(hi, lo, (*fo)[0], ptr[ 0]); | |
812 MLA(hi, lo, (*fo)[1], ptr[14]); | |
813 MLA(hi, lo, (*fo)[2], ptr[12]); | |
814 MLA(hi, lo, (*fo)[3], ptr[10]); | |
815 MLA(hi, lo, (*fo)[4], ptr[ 8]); | |
816 MLA(hi, lo, (*fo)[5], ptr[ 6]); | |
817 MLA(hi, lo, (*fo)[6], ptr[ 4]); | |
818 MLA(hi, lo, (*fo)[7], ptr[ 2]); | |
819 | |
820 *pcm1 = SHIFT(-MLZ(hi, lo)); | |
821 pcm1 += 8; | |
822 | |
823 phase = (phase + 1) % 16; | |
824 } | |
825 } | |
826 } | |
827 | |
828 /* | |
829 * NAME: synth->frame() | |
830 * DESCRIPTION: perform PCM synthesis of frame subband samples | |
831 */ | |
832 void mad_synth_frame(struct mad_synth *synth, struct mad_frame const *frame) | |
833 { | |
834 unsigned int nch, ns; | |
835 void (*synth_frame)(struct mad_synth *, struct mad_frame const *, | |
836 unsigned int, unsigned int); | |
837 | |
838 nch = MAD_NCHANNELS(&frame->header); | |
839 ns = MAD_NSBSAMPLES(&frame->header); | |
840 | |
841 synth->pcm.samplerate = frame->header.samplerate; | |
842 synth->pcm.channels = nch; | |
843 synth->pcm.length = 32 * ns; | |
844 | |
845 synth_frame = synth_full; | |
846 | |
847 if (frame->options & MAD_OPTION_HALFSAMPLERATE) { | |
848 synth->pcm.samplerate /= 2; | |
849 synth->pcm.length /= 2; | |
850 | |
851 synth_frame = synth_half; | |
852 } | |
853 | |
854 synth_frame(synth, frame, nch, ns); | |
855 | |
856 synth->phase = (synth->phase + ns) % 16; | |
857 } |