annotate src/fftw-3.3.3/dft/rader.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 37bf6b4a2645
children
rev   line source
Chris@10 1 /*
Chris@10 2 * Copyright (c) 2003, 2007-11 Matteo Frigo
Chris@10 3 * Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology
Chris@10 4 *
Chris@10 5 * This program is free software; you can redistribute it and/or modify
Chris@10 6 * it under the terms of the GNU General Public License as published by
Chris@10 7 * the Free Software Foundation; either version 2 of the License, or
Chris@10 8 * (at your option) any later version.
Chris@10 9 *
Chris@10 10 * This program is distributed in the hope that it will be useful,
Chris@10 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@10 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@10 13 * GNU General Public License for more details.
Chris@10 14 *
Chris@10 15 * You should have received a copy of the GNU General Public License
Chris@10 16 * along with this program; if not, write to the Free Software
Chris@10 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@10 18 *
Chris@10 19 */
Chris@10 20
Chris@10 21 #include "dft.h"
Chris@10 22
Chris@10 23 /*
Chris@10 24 * Compute transforms of prime sizes using Rader's trick: turn them
Chris@10 25 * into convolutions of size n - 1, which you then perform via a pair
Chris@10 26 * of FFTs.
Chris@10 27 */
Chris@10 28
Chris@10 29 typedef struct {
Chris@10 30 solver super;
Chris@10 31 } S;
Chris@10 32
Chris@10 33 typedef struct {
Chris@10 34 plan_dft super;
Chris@10 35
Chris@10 36 plan *cld1, *cld2;
Chris@10 37 R *omega;
Chris@10 38 INT n, g, ginv;
Chris@10 39 INT is, os;
Chris@10 40 plan *cld_omega;
Chris@10 41 } P;
Chris@10 42
Chris@10 43 static rader_tl *omegas = 0;
Chris@10 44
Chris@10 45 static R *mkomega(enum wakefulness wakefulness, plan *p_, INT n, INT ginv)
Chris@10 46 {
Chris@10 47 plan_dft *p = (plan_dft *) p_;
Chris@10 48 R *omega;
Chris@10 49 INT i, gpower;
Chris@10 50 trigreal scale;
Chris@10 51 triggen *t;
Chris@10 52
Chris@10 53 if ((omega = X(rader_tl_find)(n, n, ginv, omegas)))
Chris@10 54 return omega;
Chris@10 55
Chris@10 56 omega = (R *)MALLOC(sizeof(R) * (n - 1) * 2, TWIDDLES);
Chris@10 57
Chris@10 58 scale = n - 1.0; /* normalization for convolution */
Chris@10 59
Chris@10 60 t = X(mktriggen)(wakefulness, n);
Chris@10 61 for (i = 0, gpower = 1; i < n-1; ++i, gpower = MULMOD(gpower, ginv, n)) {
Chris@10 62 trigreal w[2];
Chris@10 63 t->cexpl(t, gpower, w);
Chris@10 64 omega[2*i] = w[0] / scale;
Chris@10 65 omega[2*i+1] = FFT_SIGN * w[1] / scale;
Chris@10 66 }
Chris@10 67 X(triggen_destroy)(t);
Chris@10 68 A(gpower == 1);
Chris@10 69
Chris@10 70 p->apply(p_, omega, omega + 1, omega, omega + 1);
Chris@10 71
Chris@10 72 X(rader_tl_insert)(n, n, ginv, omega, &omegas);
Chris@10 73 return omega;
Chris@10 74 }
Chris@10 75
Chris@10 76 static void free_omega(R *omega)
Chris@10 77 {
Chris@10 78 X(rader_tl_delete)(omega, &omegas);
Chris@10 79 }
Chris@10 80
Chris@10 81
Chris@10 82 /***************************************************************************/
Chris@10 83
Chris@10 84 /* Below, we extensively use the identity that fft(x*)* = ifft(x) in
Chris@10 85 order to share data between forward and backward transforms and to
Chris@10 86 obviate the necessity of having separate forward and backward
Chris@10 87 plans. (Although we often compute separate plans these days anyway
Chris@10 88 due to the differing strides, etcetera.)
Chris@10 89
Chris@10 90 Of course, since the new FFTW gives us separate pointers to
Chris@10 91 the real and imaginary parts, we could have instead used the
Chris@10 92 fft(r,i) = ifft(i,r) form of this identity, but it was easier to
Chris@10 93 reuse the code from our old version. */
Chris@10 94
Chris@10 95 static void apply(const plan *ego_, R *ri, R *ii, R *ro, R *io)
Chris@10 96 {
Chris@10 97 const P *ego = (const P *) ego_;
Chris@10 98 INT is, os;
Chris@10 99 INT k, gpower, g, r;
Chris@10 100 R *buf;
Chris@10 101 R r0 = ri[0], i0 = ii[0];
Chris@10 102
Chris@10 103 r = ego->n; is = ego->is; os = ego->os; g = ego->g;
Chris@10 104 buf = (R *) MALLOC(sizeof(R) * (r - 1) * 2, BUFFERS);
Chris@10 105
Chris@10 106 /* First, permute the input, storing in buf: */
Chris@10 107 for (gpower = 1, k = 0; k < r - 1; ++k, gpower = MULMOD(gpower, g, r)) {
Chris@10 108 R rA, iA;
Chris@10 109 rA = ri[gpower * is];
Chris@10 110 iA = ii[gpower * is];
Chris@10 111 buf[2*k] = rA; buf[2*k + 1] = iA;
Chris@10 112 }
Chris@10 113 /* gpower == g^(r-1) mod r == 1 */;
Chris@10 114
Chris@10 115
Chris@10 116 /* compute DFT of buf, storing in output (except DC): */
Chris@10 117 {
Chris@10 118 plan_dft *cld = (plan_dft *) ego->cld1;
Chris@10 119 cld->apply(ego->cld1, buf, buf+1, ro+os, io+os);
Chris@10 120 }
Chris@10 121
Chris@10 122 /* set output DC component: */
Chris@10 123 {
Chris@10 124 ro[0] = r0 + ro[os];
Chris@10 125 io[0] = i0 + io[os];
Chris@10 126 }
Chris@10 127
Chris@10 128 /* now, multiply by omega: */
Chris@10 129 {
Chris@10 130 const R *omega = ego->omega;
Chris@10 131 for (k = 0; k < r - 1; ++k) {
Chris@10 132 E rB, iB, rW, iW;
Chris@10 133 rW = omega[2*k];
Chris@10 134 iW = omega[2*k+1];
Chris@10 135 rB = ro[(k+1)*os];
Chris@10 136 iB = io[(k+1)*os];
Chris@10 137 ro[(k+1)*os] = rW * rB - iW * iB;
Chris@10 138 io[(k+1)*os] = -(rW * iB + iW * rB);
Chris@10 139 }
Chris@10 140 }
Chris@10 141
Chris@10 142 /* this will add input[0] to all of the outputs after the ifft */
Chris@10 143 ro[os] += r0;
Chris@10 144 io[os] -= i0;
Chris@10 145
Chris@10 146 /* inverse FFT: */
Chris@10 147 {
Chris@10 148 plan_dft *cld = (plan_dft *) ego->cld2;
Chris@10 149 cld->apply(ego->cld2, ro+os, io+os, buf, buf+1);
Chris@10 150 }
Chris@10 151
Chris@10 152 /* finally, do inverse permutation to unshuffle the output: */
Chris@10 153 {
Chris@10 154 INT ginv = ego->ginv;
Chris@10 155 gpower = 1;
Chris@10 156 for (k = 0; k < r - 1; ++k, gpower = MULMOD(gpower, ginv, r)) {
Chris@10 157 ro[gpower * os] = buf[2*k];
Chris@10 158 io[gpower * os] = -buf[2*k+1];
Chris@10 159 }
Chris@10 160 A(gpower == 1);
Chris@10 161 }
Chris@10 162
Chris@10 163
Chris@10 164 X(ifree)(buf);
Chris@10 165 }
Chris@10 166
Chris@10 167 /***************************************************************************/
Chris@10 168
Chris@10 169 static void awake(plan *ego_, enum wakefulness wakefulness)
Chris@10 170 {
Chris@10 171 P *ego = (P *) ego_;
Chris@10 172
Chris@10 173 X(plan_awake)(ego->cld1, wakefulness);
Chris@10 174 X(plan_awake)(ego->cld2, wakefulness);
Chris@10 175 X(plan_awake)(ego->cld_omega, wakefulness);
Chris@10 176
Chris@10 177 switch (wakefulness) {
Chris@10 178 case SLEEPY:
Chris@10 179 free_omega(ego->omega);
Chris@10 180 ego->omega = 0;
Chris@10 181 break;
Chris@10 182 default:
Chris@10 183 ego->g = X(find_generator)(ego->n);
Chris@10 184 ego->ginv = X(power_mod)(ego->g, ego->n - 2, ego->n);
Chris@10 185 A(MULMOD(ego->g, ego->ginv, ego->n) == 1);
Chris@10 186
Chris@10 187 ego->omega = mkomega(wakefulness,
Chris@10 188 ego->cld_omega, ego->n, ego->ginv);
Chris@10 189 break;
Chris@10 190 }
Chris@10 191 }
Chris@10 192
Chris@10 193 static void destroy(plan *ego_)
Chris@10 194 {
Chris@10 195 P *ego = (P *) ego_;
Chris@10 196 X(plan_destroy_internal)(ego->cld_omega);
Chris@10 197 X(plan_destroy_internal)(ego->cld2);
Chris@10 198 X(plan_destroy_internal)(ego->cld1);
Chris@10 199 }
Chris@10 200
Chris@10 201 static void print(const plan *ego_, printer *p)
Chris@10 202 {
Chris@10 203 const P *ego = (const P *)ego_;
Chris@10 204 p->print(p, "(dft-rader-%D%ois=%oos=%(%p%)",
Chris@10 205 ego->n, ego->is, ego->os, ego->cld1);
Chris@10 206 if (ego->cld2 != ego->cld1)
Chris@10 207 p->print(p, "%(%p%)", ego->cld2);
Chris@10 208 if (ego->cld_omega != ego->cld1 && ego->cld_omega != ego->cld2)
Chris@10 209 p->print(p, "%(%p%)", ego->cld_omega);
Chris@10 210 p->putchr(p, ')');
Chris@10 211 }
Chris@10 212
Chris@10 213 static int applicable(const solver *ego_, const problem *p_,
Chris@10 214 const planner *plnr)
Chris@10 215 {
Chris@10 216 const problem_dft *p = (const problem_dft *) p_;
Chris@10 217 UNUSED(ego_);
Chris@10 218 return (1
Chris@10 219 && p->sz->rnk == 1
Chris@10 220 && p->vecsz->rnk == 0
Chris@10 221 && CIMPLIES(NO_SLOWP(plnr), p->sz->dims[0].n > RADER_MAX_SLOW)
Chris@10 222 && X(is_prime)(p->sz->dims[0].n)
Chris@10 223
Chris@10 224 /* proclaim the solver SLOW if p-1 is not easily factorizable.
Chris@10 225 Bluestein should take care of this case. */
Chris@10 226 && CIMPLIES(NO_SLOWP(plnr), X(factors_into_small_primes)(p->sz->dims[0].n - 1))
Chris@10 227 );
Chris@10 228 }
Chris@10 229
Chris@10 230 static int mkP(P *pln, INT n, INT is, INT os, R *ro, R *io,
Chris@10 231 planner *plnr)
Chris@10 232 {
Chris@10 233 plan *cld1 = (plan *) 0;
Chris@10 234 plan *cld2 = (plan *) 0;
Chris@10 235 plan *cld_omega = (plan *) 0;
Chris@10 236 R *buf = (R *) 0;
Chris@10 237
Chris@10 238 /* initial allocation for the purpose of planning */
Chris@10 239 buf = (R *) MALLOC(sizeof(R) * (n - 1) * 2, BUFFERS);
Chris@10 240
Chris@10 241 cld1 = X(mkplan_f_d)(plnr,
Chris@10 242 X(mkproblem_dft_d)(X(mktensor_1d)(n - 1, 2, os),
Chris@10 243 X(mktensor_1d)(1, 0, 0),
Chris@10 244 buf, buf + 1, ro + os, io + os),
Chris@10 245 NO_SLOW, 0, 0);
Chris@10 246 if (!cld1) goto nada;
Chris@10 247
Chris@10 248 cld2 = X(mkplan_f_d)(plnr,
Chris@10 249 X(mkproblem_dft_d)(X(mktensor_1d)(n - 1, os, 2),
Chris@10 250 X(mktensor_1d)(1, 0, 0),
Chris@10 251 ro + os, io + os, buf, buf + 1),
Chris@10 252 NO_SLOW, 0, 0);
Chris@10 253
Chris@10 254 if (!cld2) goto nada;
Chris@10 255
Chris@10 256 /* plan for omega array */
Chris@10 257 cld_omega = X(mkplan_f_d)(plnr,
Chris@10 258 X(mkproblem_dft_d)(X(mktensor_1d)(n - 1, 2, 2),
Chris@10 259 X(mktensor_1d)(1, 0, 0),
Chris@10 260 buf, buf + 1, buf, buf + 1),
Chris@10 261 NO_SLOW, ESTIMATE, 0);
Chris@10 262 if (!cld_omega) goto nada;
Chris@10 263
Chris@10 264 /* deallocate buffers; let awake() or apply() allocate them for real */
Chris@10 265 X(ifree)(buf);
Chris@10 266 buf = 0;
Chris@10 267
Chris@10 268 pln->cld1 = cld1;
Chris@10 269 pln->cld2 = cld2;
Chris@10 270 pln->cld_omega = cld_omega;
Chris@10 271 pln->omega = 0;
Chris@10 272 pln->n = n;
Chris@10 273 pln->is = is;
Chris@10 274 pln->os = os;
Chris@10 275
Chris@10 276 X(ops_add)(&cld1->ops, &cld2->ops, &pln->super.super.ops);
Chris@10 277 pln->super.super.ops.other += (n - 1) * (4 * 2 + 6) + 6;
Chris@10 278 pln->super.super.ops.add += (n - 1) * 2 + 4;
Chris@10 279 pln->super.super.ops.mul += (n - 1) * 4;
Chris@10 280
Chris@10 281 return 1;
Chris@10 282
Chris@10 283 nada:
Chris@10 284 X(ifree0)(buf);
Chris@10 285 X(plan_destroy_internal)(cld_omega);
Chris@10 286 X(plan_destroy_internal)(cld2);
Chris@10 287 X(plan_destroy_internal)(cld1);
Chris@10 288 return 0;
Chris@10 289 }
Chris@10 290
Chris@10 291 static plan *mkplan(const solver *ego, const problem *p_, planner *plnr)
Chris@10 292 {
Chris@10 293 const problem_dft *p = (const problem_dft *) p_;
Chris@10 294 P *pln;
Chris@10 295 INT n;
Chris@10 296 INT is, os;
Chris@10 297
Chris@10 298 static const plan_adt padt = {
Chris@10 299 X(dft_solve), awake, print, destroy
Chris@10 300 };
Chris@10 301
Chris@10 302 if (!applicable(ego, p_, plnr))
Chris@10 303 return (plan *) 0;
Chris@10 304
Chris@10 305 n = p->sz->dims[0].n;
Chris@10 306 is = p->sz->dims[0].is;
Chris@10 307 os = p->sz->dims[0].os;
Chris@10 308
Chris@10 309 pln = MKPLAN_DFT(P, &padt, apply);
Chris@10 310 if (!mkP(pln, n, is, os, p->ro, p->io, plnr)) {
Chris@10 311 X(ifree)(pln);
Chris@10 312 return (plan *) 0;
Chris@10 313 }
Chris@10 314 return &(pln->super.super);
Chris@10 315 }
Chris@10 316
Chris@10 317 static solver *mksolver(void)
Chris@10 318 {
Chris@10 319 static const solver_adt sadt = { PROBLEM_DFT, mkplan, 0 };
Chris@10 320 S *slv = MKSOLVER(S, &sadt);
Chris@10 321 return &(slv->super);
Chris@10 322 }
Chris@10 323
Chris@10 324 void X(dft_rader_register)(planner *p)
Chris@10 325 {
Chris@10 326 REGISTER_SOLVER(p, mksolver());
Chris@10 327 }