annotate src/fftw-3.3.8/reodft/redft00e-r2hc.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21
Chris@82 22 /* Do a REDFT00 problem via an R2HC problem, with some pre/post-processing.
Chris@82 23
Chris@82 24 This code uses the trick from FFTPACK, also documented in a similar
Chris@82 25 form by Numerical Recipes. Unfortunately, this algorithm seems to
Chris@82 26 have intrinsic numerical problems (similar to those in
Chris@82 27 reodft11e-r2hc.c), possibly due to the fact that it multiplies its
Chris@82 28 input by a cosine, causing a loss of precision near the zero. For
Chris@82 29 transforms of 16k points, it has already lost three or four decimal
Chris@82 30 places of accuracy, which we deem unacceptable.
Chris@82 31
Chris@82 32 So, we have abandoned this algorithm in favor of the one in
Chris@82 33 redft00-r2hc-pad.c, which unfortunately sacrifices 30-50% in speed.
Chris@82 34 The only other alternative in the literature that does not have
Chris@82 35 similar numerical difficulties seems to be the direct adaptation of
Chris@82 36 the Cooley-Tukey decomposition for symmetric data, but this would
Chris@82 37 require a whole new set of codelets and it's not clear that it's
Chris@82 38 worth it at this point. However, we did implement the latter
Chris@82 39 algorithm for the specific case of odd n (logically adapting the
Chris@82 40 split-radix algorithm); see reodft00e-splitradix.c. */
Chris@82 41
Chris@82 42 #include "reodft/reodft.h"
Chris@82 43
Chris@82 44 typedef struct {
Chris@82 45 solver super;
Chris@82 46 } S;
Chris@82 47
Chris@82 48 typedef struct {
Chris@82 49 plan_rdft super;
Chris@82 50 plan *cld;
Chris@82 51 twid *td;
Chris@82 52 INT is, os;
Chris@82 53 INT n;
Chris@82 54 INT vl;
Chris@82 55 INT ivs, ovs;
Chris@82 56 } P;
Chris@82 57
Chris@82 58 static void apply(const plan *ego_, R *I, R *O)
Chris@82 59 {
Chris@82 60 const P *ego = (const P *) ego_;
Chris@82 61 INT is = ego->is, os = ego->os;
Chris@82 62 INT i, n = ego->n;
Chris@82 63 INT iv, vl = ego->vl;
Chris@82 64 INT ivs = ego->ivs, ovs = ego->ovs;
Chris@82 65 R *W = ego->td->W;
Chris@82 66 R *buf;
Chris@82 67 E csum;
Chris@82 68
Chris@82 69 buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
Chris@82 70
Chris@82 71 for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
Chris@82 72 buf[0] = I[0] + I[is * n];
Chris@82 73 csum = I[0] - I[is * n];
Chris@82 74 for (i = 1; i < n - i; ++i) {
Chris@82 75 E a, b, apb, amb;
Chris@82 76 a = I[is * i];
Chris@82 77 b = I[is * (n - i)];
Chris@82 78 csum += W[2*i] * (amb = K(2.0)*(a - b));
Chris@82 79 amb = W[2*i+1] * amb;
Chris@82 80 apb = (a + b);
Chris@82 81 buf[i] = apb - amb;
Chris@82 82 buf[n - i] = apb + amb;
Chris@82 83 }
Chris@82 84 if (i == n - i) {
Chris@82 85 buf[i] = K(2.0) * I[is * i];
Chris@82 86 }
Chris@82 87
Chris@82 88 {
Chris@82 89 plan_rdft *cld = (plan_rdft *) ego->cld;
Chris@82 90 cld->apply((plan *) cld, buf, buf);
Chris@82 91 }
Chris@82 92
Chris@82 93 /* FIXME: use recursive/cascade summation for better stability? */
Chris@82 94 O[0] = buf[0];
Chris@82 95 O[os] = csum;
Chris@82 96 for (i = 1; i + i < n; ++i) {
Chris@82 97 INT k = i + i;
Chris@82 98 O[os * k] = buf[i];
Chris@82 99 O[os * (k + 1)] = O[os * (k - 1)] - buf[n - i];
Chris@82 100 }
Chris@82 101 if (i + i == n) {
Chris@82 102 O[os * n] = buf[i];
Chris@82 103 }
Chris@82 104 }
Chris@82 105
Chris@82 106 X(ifree)(buf);
Chris@82 107 }
Chris@82 108
Chris@82 109 static void awake(plan *ego_, enum wakefulness wakefulness)
Chris@82 110 {
Chris@82 111 P *ego = (P *) ego_;
Chris@82 112 static const tw_instr redft00e_tw[] = {
Chris@82 113 { TW_COS, 0, 1 },
Chris@82 114 { TW_SIN, 0, 1 },
Chris@82 115 { TW_NEXT, 1, 0 }
Chris@82 116 };
Chris@82 117
Chris@82 118 X(plan_awake)(ego->cld, wakefulness);
Chris@82 119 X(twiddle_awake)(wakefulness,
Chris@82 120 &ego->td, redft00e_tw, 2*ego->n, 1, (ego->n+1)/2);
Chris@82 121 }
Chris@82 122
Chris@82 123 static void destroy(plan *ego_)
Chris@82 124 {
Chris@82 125 P *ego = (P *) ego_;
Chris@82 126 X(plan_destroy_internal)(ego->cld);
Chris@82 127 }
Chris@82 128
Chris@82 129 static void print(const plan *ego_, printer *p)
Chris@82 130 {
Chris@82 131 const P *ego = (const P *) ego_;
Chris@82 132 p->print(p, "(redft00e-r2hc-%D%v%(%p%))", ego->n + 1, ego->vl, ego->cld);
Chris@82 133 }
Chris@82 134
Chris@82 135 static int applicable0(const solver *ego_, const problem *p_)
Chris@82 136 {
Chris@82 137 const problem_rdft *p = (const problem_rdft *) p_;
Chris@82 138 UNUSED(ego_);
Chris@82 139
Chris@82 140 return (1
Chris@82 141 && p->sz->rnk == 1
Chris@82 142 && p->vecsz->rnk <= 1
Chris@82 143 && p->kind[0] == REDFT00
Chris@82 144 && p->sz->dims[0].n > 1 /* n == 1 is not well-defined */
Chris@82 145 );
Chris@82 146 }
Chris@82 147
Chris@82 148 static int applicable(const solver *ego, const problem *p, const planner *plnr)
Chris@82 149 {
Chris@82 150 return (!NO_SLOWP(plnr) && applicable0(ego, p));
Chris@82 151 }
Chris@82 152
Chris@82 153 static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
Chris@82 154 {
Chris@82 155 P *pln;
Chris@82 156 const problem_rdft *p;
Chris@82 157 plan *cld;
Chris@82 158 R *buf;
Chris@82 159 INT n;
Chris@82 160 opcnt ops;
Chris@82 161
Chris@82 162 static const plan_adt padt = {
Chris@82 163 X(rdft_solve), awake, print, destroy
Chris@82 164 };
Chris@82 165
Chris@82 166 if (!applicable(ego_, p_, plnr))
Chris@82 167 return (plan *)0;
Chris@82 168
Chris@82 169 p = (const problem_rdft *) p_;
Chris@82 170
Chris@82 171 n = p->sz->dims[0].n - 1;
Chris@82 172 A(n > 0);
Chris@82 173 buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
Chris@82 174
Chris@82 175 cld = X(mkplan_d)(plnr, X(mkproblem_rdft_1_d)(X(mktensor_1d)(n, 1, 1),
Chris@82 176 X(mktensor_0d)(),
Chris@82 177 buf, buf, R2HC));
Chris@82 178 X(ifree)(buf);
Chris@82 179 if (!cld)
Chris@82 180 return (plan *)0;
Chris@82 181
Chris@82 182 pln = MKPLAN_RDFT(P, &padt, apply);
Chris@82 183
Chris@82 184 pln->n = n;
Chris@82 185 pln->is = p->sz->dims[0].is;
Chris@82 186 pln->os = p->sz->dims[0].os;
Chris@82 187 pln->cld = cld;
Chris@82 188 pln->td = 0;
Chris@82 189
Chris@82 190 X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs);
Chris@82 191
Chris@82 192 X(ops_zero)(&ops);
Chris@82 193 ops.other = 8 + (n-1)/2 * 11 + (1 - n % 2) * 5;
Chris@82 194 ops.add = 2 + (n-1)/2 * 5;
Chris@82 195 ops.mul = (n-1)/2 * 3 + (1 - n % 2) * 1;
Chris@82 196
Chris@82 197 X(ops_zero)(&pln->super.super.ops);
Chris@82 198 X(ops_madd2)(pln->vl, &ops, &pln->super.super.ops);
Chris@82 199 X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops);
Chris@82 200
Chris@82 201 return &(pln->super.super);
Chris@82 202 }
Chris@82 203
Chris@82 204 /* constructor */
Chris@82 205 static solver *mksolver(void)
Chris@82 206 {
Chris@82 207 static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 };
Chris@82 208 S *slv = MKSOLVER(S, &sadt);
Chris@82 209 return &(slv->super);
Chris@82 210 }
Chris@82 211
Chris@82 212 void X(redft00e_r2hc_register)(planner *p)
Chris@82 213 {
Chris@82 214 REGISTER_SOLVER(p, mksolver());
Chris@82 215 }