js-dsp-test: fft/fftw/fftw-3.3.4/reodft/reodft010e-r2hc.c annotate

annotate fft/fftw/fftw-3.3.4/reodft/reodft010e-r2hc.c @ 40:223f770b5341 kissfft-double tip

Try a double-precision kissfft

author	Chris Cannam
date	Wed, 07 Sep 2016 10:40:32 +0100
parents	26056e866c29
children

rev	line source
Chris@19	1 /*
Chris@19	2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@19	3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@19	4 *
Chris@19	5 * This program is free software; you can redistribute it and/or modify
Chris@19	6 * it under the terms of the GNU General Public License as published by
Chris@19	7 * the Free Software Foundation; either version 2 of the License, or
Chris@19	8 * (at your option) any later version.
Chris@19	9 *
Chris@19	10 * This program is distributed in the hope that it will be useful,
Chris@19	11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@19	12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@19	13 * GNU General Public License for more details.
Chris@19	14 *
Chris@19	15 * You should have received a copy of the GNU General Public License
Chris@19	16 * along with this program; if not, write to the Free Software
Chris@19	17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@19	18 *
Chris@19	19 */
Chris@19	20
Chris@19	21
Chris@19	22 /* Do an R{E,O}DFT{01,10} problem via an R2HC problem, with some
Chris@19	23 pre/post-processing ala FFTPACK. */
Chris@19	24
Chris@19	25 #include "reodft.h"
Chris@19	26
Chris@19	27 typedef struct {
Chris@19	28 solver super;
Chris@19	29 } S;
Chris@19	30
Chris@19	31 typedef struct {
Chris@19	32 plan_rdft super;
Chris@19	33 plan *cld;
Chris@19	34 twid *td;
Chris@19	35 INT is, os;
Chris@19	36 INT n;
Chris@19	37 INT vl;
Chris@19	38 INT ivs, ovs;
Chris@19	39 rdft_kind kind;
Chris@19	40 } P;
Chris@19	41
Chris@19	42 /* A real-even-01 DFT operates logically on a size-4N array:
Chris@19	43 I 0 -r(I) -I 0 r(I),
Chris@19	44 where r denotes reversal and * denotes deletion of the 0th element.
Chris@19	45 To compute the transform of this, we imagine performing a radix-4
Chris@19	46 (real-input) DIF step, which turns the size-4N DFT into 4 size-N
Chris@19	47 (contiguous) DFTs, two of which are zero and two of which are
Chris@19	48 conjugates. The non-redundant size-N DFT has halfcomplex input, so
Chris@19	49 we can do it with a size-N hc2r transform. (In order to share
Chris@19	50 plans with the re10 (inverse) transform, however, we use the DHT
Chris@19	51 trick to re-express the hc2r problem as r2hc. This has little cost
Chris@19	52 since we are already pre- and post-processing the data in {i,n-i}
Chris@19	53 order.) Finally, we have to write out the data in the correct
Chris@19	54 order...the two size-N redundant (conjugate) hc2r DFTs correspond
Chris@19	55 to the even and odd outputs in O (i.e. the usual interleaved output
Chris@19	56 of DIF transforms); since this data has even symmetry, we only
Chris@19	57 write the first half of it.
Chris@19	58
Chris@19	59 The real-even-10 DFT is just the reverse of these steps, i.e. a
Chris@19	60 radix-4 DIT transform. There, however, we just use the r2hc
Chris@19	61 transform naturally without resorting to the DHT trick.
Chris@19	62
Chris@19	63 A real-odd-01 DFT is very similar, except that the input is
Chris@19	64 0 I (rI)* 0 -I -(rI)*. This format, however, can be transformed
Chris@19	65 into precisely the real-even-01 format above by sending I -> rI
Chris@19	66 and shifting the array by N. The former swap is just another
Chris@19	67 transformation on the input during preprocessing; the latter
Chris@19	68 multiplies the even/odd outputs by i/-i, which combines with
Chris@19	69 the factor of -i (to take the imaginary part) to simply flip
Chris@19	70 the sign of the odd outputs. Vice-versa for real-odd-10.
Chris@19	71
Chris@19	72 The FFTPACK source code was very helpful in working this out.
Chris@19	73 (They do unnecessary passes over the array, though.) The same
Chris@19	74 algorithm is also described in:
Chris@19	75
Chris@19	76 John Makhoul, "A fast cosine transform in one and two dimensions,"
Chris@19	77 IEEE Trans. on Acoust. Speech and Sig. Proc., ASSP-28 (1), 27--34 (1980).
Chris@19	78
Chris@19	79 Note that Numerical Recipes suggests a different algorithm that
Chris@19	80 requires more operations and uses trig. functions for both the pre-
Chris@19	81 and post-processing passes.
Chris@19	82 */
Chris@19	83
Chris@19	84 static void apply_re01(const plan ego_, R I, R *O)
Chris@19	85 {
Chris@19	86 const P ego = (const P ) ego_;
Chris@19	87 INT is = ego->is, os = ego->os;
Chris@19	88 INT i, n = ego->n;
Chris@19	89 INT iv, vl = ego->vl;
Chris@19	90 INT ivs = ego->ivs, ovs = ego->ovs;
Chris@19	91 R *W = ego->td->W;
Chris@19	92 R *buf;
Chris@19	93
Chris@19	94 buf = (R ) MALLOC(sizeof(R) n, BUFFERS);
Chris@19	95
Chris@19	96 for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
Chris@19	97 buf[0] = I[0];
Chris@19	98 for (i = 1; i < n - i; ++i) {
Chris@19	99 E a, b, apb, amb, wa, wb;
Chris@19	100 a = I[is * i];
Chris@19	101 b = I[is * (n - i)];
Chris@19	102 apb = a + b;
Chris@19	103 amb = a - b;
Chris@19	104 wa = W[2*i];
Chris@19	105 wb = W[2*i + 1];
Chris@19	106 buf[i] = wa * amb + wb * apb;
Chris@19	107 buf[n - i] = wa * apb - wb * amb;
Chris@19	108 }
Chris@19	109 if (i == n - i) {
Chris@19	110 buf[i] = K(2.0) * I[is * i] * W[2*i];
Chris@19	111 }
Chris@19	112
Chris@19	113 {
Chris@19	114 plan_rdft cld = (plan_rdft ) ego->cld;
Chris@19	115 cld->apply((plan *) cld, buf, buf);
Chris@19	116 }
Chris@19	117
Chris@19	118 O[0] = buf[0];
Chris@19	119 for (i = 1; i < n - i; ++i) {
Chris@19	120 E a, b;
Chris@19	121 INT k;
Chris@19	122 a = buf[i];
Chris@19	123 b = buf[n - i];
Chris@19	124 k = i + i;
Chris@19	125 O[os * (k - 1)] = a - b;
Chris@19	126 O[os * k] = a + b;
Chris@19	127 }
Chris@19	128 if (i == n - i) {
Chris@19	129 O[os * (n - 1)] = buf[i];
Chris@19	130 }
Chris@19	131 }
Chris@19	132
Chris@19	133 X(ifree)(buf);
Chris@19	134 }
Chris@19	135
Chris@19	136 /* ro01 is same as re01, but with i <-> n - 1 - i in the input and
Chris@19	137 the sign of the odd output elements flipped. */
Chris@19	138 static void apply_ro01(const plan ego_, R I, R *O)
Chris@19	139 {
Chris@19	140 const P ego = (const P ) ego_;
Chris@19	141 INT is = ego->is, os = ego->os;
Chris@19	142 INT i, n = ego->n;
Chris@19	143 INT iv, vl = ego->vl;
Chris@19	144 INT ivs = ego->ivs, ovs = ego->ovs;
Chris@19	145 R *W = ego->td->W;
Chris@19	146 R *buf;
Chris@19	147
Chris@19	148 buf = (R ) MALLOC(sizeof(R) n, BUFFERS);
Chris@19	149
Chris@19	150 for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
Chris@19	151 buf[0] = I[is * (n - 1)];
Chris@19	152 for (i = 1; i < n - i; ++i) {
Chris@19	153 E a, b, apb, amb, wa, wb;
Chris@19	154 a = I[is * (n - 1 - i)];
Chris@19	155 b = I[is * (i - 1)];
Chris@19	156 apb = a + b;
Chris@19	157 amb = a - b;
Chris@19	158 wa = W[2*i];
Chris@19	159 wb = W[2*i+1];
Chris@19	160 buf[i] = wa * amb + wb * apb;
Chris@19	161 buf[n - i] = wa * apb - wb * amb;
Chris@19	162 }
Chris@19	163 if (i == n - i) {
Chris@19	164 buf[i] = K(2.0) * I[is * (i - 1)] * W[2*i];
Chris@19	165 }
Chris@19	166
Chris@19	167 {
Chris@19	168 plan_rdft cld = (plan_rdft ) ego->cld;
Chris@19	169 cld->apply((plan *) cld, buf, buf);
Chris@19	170 }
Chris@19	171
Chris@19	172 O[0] = buf[0];
Chris@19	173 for (i = 1; i < n - i; ++i) {
Chris@19	174 E a, b;
Chris@19	175 INT k;
Chris@19	176 a = buf[i];
Chris@19	177 b = buf[n - i];
Chris@19	178 k = i + i;
Chris@19	179 O[os * (k - 1)] = b - a;
Chris@19	180 O[os * k] = a + b;
Chris@19	181 }
Chris@19	182 if (i == n - i) {
Chris@19	183 O[os * (n - 1)] = -buf[i];
Chris@19	184 }
Chris@19	185 }
Chris@19	186
Chris@19	187 X(ifree)(buf);
Chris@19	188 }
Chris@19	189
Chris@19	190 static void apply_re10(const plan ego_, R I, R *O)
Chris@19	191 {
Chris@19	192 const P ego = (const P ) ego_;
Chris@19	193 INT is = ego->is, os = ego->os;
Chris@19	194 INT i, n = ego->n;
Chris@19	195 INT iv, vl = ego->vl;
Chris@19	196 INT ivs = ego->ivs, ovs = ego->ovs;
Chris@19	197 R *W = ego->td->W;
Chris@19	198 R *buf;
Chris@19	199
Chris@19	200 buf = (R ) MALLOC(sizeof(R) n, BUFFERS);
Chris@19	201
Chris@19	202 for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
Chris@19	203 buf[0] = I[0];
Chris@19	204 for (i = 1; i < n - i; ++i) {
Chris@19	205 E u, v;
Chris@19	206 INT k = i + i;
Chris@19	207 u = I[is * (k - 1)];
Chris@19	208 v = I[is * k];
Chris@19	209 buf[n - i] = u;
Chris@19	210 buf[i] = v;
Chris@19	211 }
Chris@19	212 if (i == n - i) {
Chris@19	213 buf[i] = I[is * (n - 1)];
Chris@19	214 }
Chris@19	215
Chris@19	216 {
Chris@19	217 plan_rdft cld = (plan_rdft ) ego->cld;
Chris@19	218 cld->apply((plan *) cld, buf, buf);
Chris@19	219 }
Chris@19	220
Chris@19	221 O[0] = K(2.0) * buf[0];
Chris@19	222 for (i = 1; i < n - i; ++i) {
Chris@19	223 E a, b, wa, wb;
Chris@19	224 a = K(2.0) * buf[i];
Chris@19	225 b = K(2.0) * buf[n - i];
Chris@19	226 wa = W[2*i];
Chris@19	227 wb = W[2*i + 1];
Chris@19	228 O[os * i] = wa * a + wb * b;
Chris@19	229 O[os * (n - i)] = wb * a - wa * b;
Chris@19	230 }
Chris@19	231 if (i == n - i) {
Chris@19	232 O[os * i] = K(2.0) * buf[i] * W[2*i];
Chris@19	233 }
Chris@19	234 }
Chris@19	235
Chris@19	236 X(ifree)(buf);
Chris@19	237 }
Chris@19	238
Chris@19	239 /* ro10 is same as re10, but with i <-> n - 1 - i in the output and
Chris@19	240 the sign of the odd input elements flipped. */
Chris@19	241 static void apply_ro10(const plan ego_, R I, R *O)
Chris@19	242 {
Chris@19	243 const P ego = (const P ) ego_;
Chris@19	244 INT is = ego->is, os = ego->os;
Chris@19	245 INT i, n = ego->n;
Chris@19	246 INT iv, vl = ego->vl;
Chris@19	247 INT ivs = ego->ivs, ovs = ego->ovs;
Chris@19	248 R *W = ego->td->W;
Chris@19	249 R *buf;
Chris@19	250
Chris@19	251 buf = (R ) MALLOC(sizeof(R) n, BUFFERS);
Chris@19	252
Chris@19	253 for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
Chris@19	254 buf[0] = I[0];
Chris@19	255 for (i = 1; i < n - i; ++i) {
Chris@19	256 E u, v;
Chris@19	257 INT k = i + i;
Chris@19	258 u = -I[is * (k - 1)];
Chris@19	259 v = I[is * k];
Chris@19	260 buf[n - i] = u;
Chris@19	261 buf[i] = v;
Chris@19	262 }
Chris@19	263 if (i == n - i) {
Chris@19	264 buf[i] = -I[is * (n - 1)];
Chris@19	265 }
Chris@19	266
Chris@19	267 {
Chris@19	268 plan_rdft cld = (plan_rdft ) ego->cld;
Chris@19	269 cld->apply((plan *) cld, buf, buf);
Chris@19	270 }
Chris@19	271
Chris@19	272 O[os * (n - 1)] = K(2.0) * buf[0];
Chris@19	273 for (i = 1; i < n - i; ++i) {
Chris@19	274 E a, b, wa, wb;
Chris@19	275 a = K(2.0) * buf[i];
Chris@19	276 b = K(2.0) * buf[n - i];
Chris@19	277 wa = W[2*i];
Chris@19	278 wb = W[2*i + 1];
Chris@19	279 O[os * (n - 1 - i)] = wa * a + wb * b;
Chris@19	280 O[os * (i - 1)] = wb * a - wa * b;
Chris@19	281 }
Chris@19	282 if (i == n - i) {
Chris@19	283 O[os * (i - 1)] = K(2.0) * buf[i] * W[2*i];
Chris@19	284 }
Chris@19	285 }
Chris@19	286
Chris@19	287 X(ifree)(buf);
Chris@19	288 }
Chris@19	289
Chris@19	290 static void awake(plan *ego_, enum wakefulness wakefulness)
Chris@19	291 {
Chris@19	292 P ego = (P ) ego_;
Chris@19	293 static const tw_instr reodft010e_tw[] = {
Chris@19	294 { TW_COS, 0, 1 },
Chris@19	295 { TW_SIN, 0, 1 },
Chris@19	296 { TW_NEXT, 1, 0 }
Chris@19	297 };
Chris@19	298
Chris@19	299 X(plan_awake)(ego->cld, wakefulness);
Chris@19	300
Chris@19	301 X(twiddle_awake)(wakefulness, &ego->td, reodft010e_tw,
Chris@19	302 4*ego->n, 1, ego->n/2+1);
Chris@19	303 }
Chris@19	304
Chris@19	305 static void destroy(plan *ego_)
Chris@19	306 {
Chris@19	307 P ego = (P ) ego_;
Chris@19	308 X(plan_destroy_internal)(ego->cld);
Chris@19	309 }
Chris@19	310
Chris@19	311 static void print(const plan ego_, printer p)
Chris@19	312 {
Chris@19	313 const P ego = (const P ) ego_;
Chris@19	314 p->print(p, "(%se-r2hc-%D%v%(%p%))",
Chris@19	315 X(rdft_kind_str)(ego->kind), ego->n, ego->vl, ego->cld);
Chris@19	316 }
Chris@19	317
Chris@19	318 static int applicable0(const solver ego_, const problem p_)
Chris@19	319 {
Chris@19	320 const problem_rdft p = (const problem_rdft ) p_;
Chris@19	321 UNUSED(ego_);
Chris@19	322
Chris@19	323 return (1
Chris@19	324 && p->sz->rnk == 1
Chris@19	325 && p->vecsz->rnk <= 1
Chris@19	326 && (p->kind[0] == REDFT01 \|\| p->kind[0] == REDFT10
Chris@19	327 \|\| p->kind[0] == RODFT01 \|\| p->kind[0] == RODFT10)
Chris@19	328 );
Chris@19	329 }
Chris@19	330
Chris@19	331 static int applicable(const solver ego, const problem p, const planner *plnr)
Chris@19	332 {
Chris@19	333 return (!NO_SLOWP(plnr) && applicable0(ego, p));
Chris@19	334 }
Chris@19	335
Chris@19	336 static plan mkplan(const solver ego_, const problem p_, planner plnr)
Chris@19	337 {
Chris@19	338 P *pln;
Chris@19	339 const problem_rdft *p;
Chris@19	340 plan *cld;
Chris@19	341 R *buf;
Chris@19	342 INT n;
Chris@19	343 opcnt ops;
Chris@19	344
Chris@19	345 static const plan_adt padt = {
Chris@19	346 X(rdft_solve), awake, print, destroy
Chris@19	347 };
Chris@19	348
Chris@19	349 if (!applicable(ego_, p_, plnr))
Chris@19	350 return (plan *)0;
Chris@19	351
Chris@19	352 p = (const problem_rdft *) p_;
Chris@19	353
Chris@19	354 n = p->sz->dims[0].n;
Chris@19	355 buf = (R ) MALLOC(sizeof(R) n, BUFFERS);
Chris@19	356
Chris@19	357 cld = X(mkplan_d)(plnr, X(mkproblem_rdft_1_d)(X(mktensor_1d)(n, 1, 1),
Chris@19	358 X(mktensor_0d)(),
Chris@19	359 buf, buf, R2HC));
Chris@19	360 X(ifree)(buf);
Chris@19	361 if (!cld)
Chris@19	362 return (plan *)0;
Chris@19	363
Chris@19	364 switch (p->kind[0]) {
Chris@19	365 case REDFT01: pln = MKPLAN_RDFT(P, &padt, apply_re01); break;
Chris@19	366 case REDFT10: pln = MKPLAN_RDFT(P, &padt, apply_re10); break;
Chris@19	367 case RODFT01: pln = MKPLAN_RDFT(P, &padt, apply_ro01); break;
Chris@19	368 case RODFT10: pln = MKPLAN_RDFT(P, &padt, apply_ro10); break;
Chris@19	369 default: A(0); return (plan*)0;
Chris@19	370 }
Chris@19	371
Chris@19	372 pln->n = n;
Chris@19	373 pln->is = p->sz->dims[0].is;
Chris@19	374 pln->os = p->sz->dims[0].os;
Chris@19	375 pln->cld = cld;
Chris@19	376 pln->td = 0;
Chris@19	377 pln->kind = p->kind[0];
Chris@19	378
Chris@19	379 X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs);
Chris@19	380
Chris@19	381 X(ops_zero)(&ops);
Chris@19	382 ops.other = 4 + (n-1)/2 * 10 + (1 - n % 2) * 5;
Chris@19	383 if (p->kind[0] == REDFT01 \|\| p->kind[0] == RODFT01) {
Chris@19	384 ops.add = (n-1)/2 * 6;
Chris@19	385 ops.mul = (n-1)/2 * 4 + (1 - n % 2) * 2;
Chris@19	386 }
Chris@19	387 else { /* 10 transforms */
Chris@19	388 ops.add = (n-1)/2 * 2;
Chris@19	389 ops.mul = 1 + (n-1)/2 * 6 + (1 - n % 2) * 2;
Chris@19	390 }
Chris@19	391
Chris@19	392 X(ops_zero)(&pln->super.super.ops);
Chris@19	393 X(ops_madd2)(pln->vl, &ops, &pln->super.super.ops);
Chris@19	394 X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops);
Chris@19	395
Chris@19	396 return &(pln->super.super);
Chris@19	397 }
Chris@19	398
Chris@19	399 /* constructor */
Chris@19	400 static solver *mksolver(void)
Chris@19	401 {
Chris@19	402 static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 };
Chris@19	403 S *slv = MKSOLVER(S, &sadt);
Chris@19	404 return &(slv->super);
Chris@19	405 }
Chris@19	406
Chris@19	407 void X(reodft010e_r2hc_register)(planner *p)
Chris@19	408 {
Chris@19	409 REGISTER_SOLVER(p, mksolver());
Chris@19	410 }

Mercurial > hg > js-dsp-test

annotate fft/fftw/fftw-3.3.4/reodft/reodft010e-r2hc.c @ 40:223f770b5341 kissfft-double tip