comparison src/fftw-3.3.5/reodft/redft00e-r2hc.c @ 42:2cd0e3b3e1fd

Current fftw source
author Chris Cannam
date Tue, 18 Oct 2016 13:40:26 +0100
parents
children
comparison
equal deleted inserted replaced
41:481f5f8c5634 42:2cd0e3b3e1fd
1 /*
2 * Copyright (c) 2003, 2007-14 Matteo Frigo
3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 */
20
21
22 /* Do a REDFT00 problem via an R2HC problem, with some pre/post-processing.
23
24 This code uses the trick from FFTPACK, also documented in a similar
25 form by Numerical Recipes. Unfortunately, this algorithm seems to
26 have intrinsic numerical problems (similar to those in
27 reodft11e-r2hc.c), possibly due to the fact that it multiplies its
28 input by a cosine, causing a loss of precision near the zero. For
29 transforms of 16k points, it has already lost three or four decimal
30 places of accuracy, which we deem unacceptable.
31
32 So, we have abandoned this algorithm in favor of the one in
33 redft00-r2hc-pad.c, which unfortunately sacrifices 30-50% in speed.
34 The only other alternative in the literature that does not have
35 similar numerical difficulties seems to be the direct adaptation of
36 the Cooley-Tukey decomposition for symmetric data, but this would
37 require a whole new set of codelets and it's not clear that it's
38 worth it at this point. However, we did implement the latter
39 algorithm for the specific case of odd n (logically adapting the
40 split-radix algorithm); see reodft00e-splitradix.c. */
41
42 #include "reodft.h"
43
44 typedef struct {
45 solver super;
46 } S;
47
48 typedef struct {
49 plan_rdft super;
50 plan *cld;
51 twid *td;
52 INT is, os;
53 INT n;
54 INT vl;
55 INT ivs, ovs;
56 } P;
57
58 static void apply(const plan *ego_, R *I, R *O)
59 {
60 const P *ego = (const P *) ego_;
61 INT is = ego->is, os = ego->os;
62 INT i, n = ego->n;
63 INT iv, vl = ego->vl;
64 INT ivs = ego->ivs, ovs = ego->ovs;
65 R *W = ego->td->W;
66 R *buf;
67 E csum;
68
69 buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
70
71 for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
72 buf[0] = I[0] + I[is * n];
73 csum = I[0] - I[is * n];
74 for (i = 1; i < n - i; ++i) {
75 E a, b, apb, amb;
76 a = I[is * i];
77 b = I[is * (n - i)];
78 csum += W[2*i] * (amb = K(2.0)*(a - b));
79 amb = W[2*i+1] * amb;
80 apb = (a + b);
81 buf[i] = apb - amb;
82 buf[n - i] = apb + amb;
83 }
84 if (i == n - i) {
85 buf[i] = K(2.0) * I[is * i];
86 }
87
88 {
89 plan_rdft *cld = (plan_rdft *) ego->cld;
90 cld->apply((plan *) cld, buf, buf);
91 }
92
93 /* FIXME: use recursive/cascade summation for better stability? */
94 O[0] = buf[0];
95 O[os] = csum;
96 for (i = 1; i + i < n; ++i) {
97 INT k = i + i;
98 O[os * k] = buf[i];
99 O[os * (k + 1)] = O[os * (k - 1)] - buf[n - i];
100 }
101 if (i + i == n) {
102 O[os * n] = buf[i];
103 }
104 }
105
106 X(ifree)(buf);
107 }
108
109 static void awake(plan *ego_, enum wakefulness wakefulness)
110 {
111 P *ego = (P *) ego_;
112 static const tw_instr redft00e_tw[] = {
113 { TW_COS, 0, 1 },
114 { TW_SIN, 0, 1 },
115 { TW_NEXT, 1, 0 }
116 };
117
118 X(plan_awake)(ego->cld, wakefulness);
119 X(twiddle_awake)(wakefulness,
120 &ego->td, redft00e_tw, 2*ego->n, 1, (ego->n+1)/2);
121 }
122
123 static void destroy(plan *ego_)
124 {
125 P *ego = (P *) ego_;
126 X(plan_destroy_internal)(ego->cld);
127 }
128
129 static void print(const plan *ego_, printer *p)
130 {
131 const P *ego = (const P *) ego_;
132 p->print(p, "(redft00e-r2hc-%D%v%(%p%))", ego->n + 1, ego->vl, ego->cld);
133 }
134
135 static int applicable0(const solver *ego_, const problem *p_)
136 {
137 const problem_rdft *p = (const problem_rdft *) p_;
138 UNUSED(ego_);
139
140 return (1
141 && p->sz->rnk == 1
142 && p->vecsz->rnk <= 1
143 && p->kind[0] == REDFT00
144 && p->sz->dims[0].n > 1 /* n == 1 is not well-defined */
145 );
146 }
147
148 static int applicable(const solver *ego, const problem *p, const planner *plnr)
149 {
150 return (!NO_SLOWP(plnr) && applicable0(ego, p));
151 }
152
153 static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
154 {
155 P *pln;
156 const problem_rdft *p;
157 plan *cld;
158 R *buf;
159 INT n;
160 opcnt ops;
161
162 static const plan_adt padt = {
163 X(rdft_solve), awake, print, destroy
164 };
165
166 if (!applicable(ego_, p_, plnr))
167 return (plan *)0;
168
169 p = (const problem_rdft *) p_;
170
171 n = p->sz->dims[0].n - 1;
172 A(n > 0);
173 buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
174
175 cld = X(mkplan_d)(plnr, X(mkproblem_rdft_1_d)(X(mktensor_1d)(n, 1, 1),
176 X(mktensor_0d)(),
177 buf, buf, R2HC));
178 X(ifree)(buf);
179 if (!cld)
180 return (plan *)0;
181
182 pln = MKPLAN_RDFT(P, &padt, apply);
183
184 pln->n = n;
185 pln->is = p->sz->dims[0].is;
186 pln->os = p->sz->dims[0].os;
187 pln->cld = cld;
188 pln->td = 0;
189
190 X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs);
191
192 X(ops_zero)(&ops);
193 ops.other = 8 + (n-1)/2 * 11 + (1 - n % 2) * 5;
194 ops.add = 2 + (n-1)/2 * 5;
195 ops.mul = (n-1)/2 * 3 + (1 - n % 2) * 1;
196
197 X(ops_zero)(&pln->super.super.ops);
198 X(ops_madd2)(pln->vl, &ops, &pln->super.super.ops);
199 X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops);
200
201 return &(pln->super.super);
202 }
203
204 /* constructor */
205 static solver *mksolver(void)
206 {
207 static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 };
208 S *slv = MKSOLVER(S, &sadt);
209 return &(slv->super);
210 }
211
212 void X(redft00e_r2hc_register)(planner *p)
213 {
214 REGISTER_SOLVER(p, mksolver());
215 }