annotate src/fftw-3.3.8/rdft/rdft2-rdft.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21
Chris@82 22 #include "rdft/rdft.h"
Chris@82 23
Chris@82 24 typedef struct {
Chris@82 25 solver super;
Chris@82 26 } S;
Chris@82 27
Chris@82 28 typedef struct {
Chris@82 29 plan_rdft2 super;
Chris@82 30
Chris@82 31 plan *cld, *cldrest;
Chris@82 32 INT n, vl, nbuf, bufdist;
Chris@82 33 INT cs, ivs, ovs;
Chris@82 34 } P;
Chris@82 35
Chris@82 36 /***************************************************************************/
Chris@82 37
Chris@82 38 /* FIXME: have alternate copy functions that push a vector loop inside
Chris@82 39 the n loops? */
Chris@82 40
Chris@82 41 /* copy halfcomplex array r (contiguous) to complex (strided) array rio/iio. */
Chris@82 42 static void hc2c(INT n, R *r, R *rio, R *iio, INT os)
Chris@82 43 {
Chris@82 44 INT i;
Chris@82 45
Chris@82 46 rio[0] = r[0];
Chris@82 47 iio[0] = 0;
Chris@82 48
Chris@82 49 for (i = 1; i + i < n; ++i) {
Chris@82 50 rio[i * os] = r[i];
Chris@82 51 iio[i * os] = r[n - i];
Chris@82 52 }
Chris@82 53
Chris@82 54 if (i + i == n) { /* store the Nyquist frequency */
Chris@82 55 rio[i * os] = r[i];
Chris@82 56 iio[i * os] = K(0.0);
Chris@82 57 }
Chris@82 58 }
Chris@82 59
Chris@82 60 /* reverse of hc2c */
Chris@82 61 static void c2hc(INT n, R *rio, R *iio, INT is, R *r)
Chris@82 62 {
Chris@82 63 INT i;
Chris@82 64
Chris@82 65 r[0] = rio[0];
Chris@82 66
Chris@82 67 for (i = 1; i + i < n; ++i) {
Chris@82 68 r[i] = rio[i * is];
Chris@82 69 r[n - i] = iio[i * is];
Chris@82 70 }
Chris@82 71
Chris@82 72 if (i + i == n) /* store the Nyquist frequency */
Chris@82 73 r[i] = rio[i * is];
Chris@82 74 }
Chris@82 75
Chris@82 76 /***************************************************************************/
Chris@82 77
Chris@82 78 static void apply_r2hc(const plan *ego_, R *r0, R *r1, R *cr, R *ci)
Chris@82 79 {
Chris@82 80 const P *ego = (const P *) ego_;
Chris@82 81 plan_rdft *cld = (plan_rdft *) ego->cld;
Chris@82 82 INT i, j, vl = ego->vl, nbuf = ego->nbuf, bufdist = ego->bufdist;
Chris@82 83 INT n = ego->n;
Chris@82 84 INT ivs = ego->ivs, ovs = ego->ovs, os = ego->cs;
Chris@82 85 R *bufs = (R *)MALLOC(sizeof(R) * nbuf * bufdist, BUFFERS);
Chris@82 86 plan_rdft2 *cldrest;
Chris@82 87
Chris@82 88 for (i = nbuf; i <= vl; i += nbuf) {
Chris@82 89 /* transform to bufs: */
Chris@82 90 cld->apply((plan *) cld, r0, bufs);
Chris@82 91 r0 += ivs * nbuf; r1 += ivs * nbuf;
Chris@82 92
Chris@82 93 /* copy back */
Chris@82 94 for (j = 0; j < nbuf; ++j, cr += ovs, ci += ovs)
Chris@82 95 hc2c(n, bufs + j*bufdist, cr, ci, os);
Chris@82 96 }
Chris@82 97
Chris@82 98 X(ifree)(bufs);
Chris@82 99
Chris@82 100 /* Do the remaining transforms, if any: */
Chris@82 101 cldrest = (plan_rdft2 *) ego->cldrest;
Chris@82 102 cldrest->apply((plan *) cldrest, r0, r1, cr, ci);
Chris@82 103 }
Chris@82 104
Chris@82 105 static void apply_hc2r(const plan *ego_, R *r0, R *r1, R *cr, R *ci)
Chris@82 106 {
Chris@82 107 const P *ego = (const P *) ego_;
Chris@82 108 plan_rdft *cld = (plan_rdft *) ego->cld;
Chris@82 109 INT i, j, vl = ego->vl, nbuf = ego->nbuf, bufdist = ego->bufdist;
Chris@82 110 INT n = ego->n;
Chris@82 111 INT ivs = ego->ivs, ovs = ego->ovs, is = ego->cs;
Chris@82 112 R *bufs = (R *)MALLOC(sizeof(R) * nbuf * bufdist, BUFFERS);
Chris@82 113 plan_rdft2 *cldrest;
Chris@82 114
Chris@82 115 for (i = nbuf; i <= vl; i += nbuf) {
Chris@82 116 /* copy to bufs */
Chris@82 117 for (j = 0; j < nbuf; ++j, cr += ivs, ci += ivs)
Chris@82 118 c2hc(n, cr, ci, is, bufs + j*bufdist);
Chris@82 119
Chris@82 120 /* transform back: */
Chris@82 121 cld->apply((plan *) cld, bufs, r0);
Chris@82 122 r0 += ovs * nbuf; r1 += ovs * nbuf;
Chris@82 123 }
Chris@82 124
Chris@82 125 X(ifree)(bufs);
Chris@82 126
Chris@82 127 /* Do the remaining transforms, if any: */
Chris@82 128 cldrest = (plan_rdft2 *) ego->cldrest;
Chris@82 129 cldrest->apply((plan *) cldrest, r0, r1, cr, ci);
Chris@82 130 }
Chris@82 131
Chris@82 132 static void awake(plan *ego_, enum wakefulness wakefulness)
Chris@82 133 {
Chris@82 134 P *ego = (P *) ego_;
Chris@82 135
Chris@82 136 X(plan_awake)(ego->cld, wakefulness);
Chris@82 137 X(plan_awake)(ego->cldrest, wakefulness);
Chris@82 138 }
Chris@82 139
Chris@82 140 static void destroy(plan *ego_)
Chris@82 141 {
Chris@82 142 P *ego = (P *) ego_;
Chris@82 143 X(plan_destroy_internal)(ego->cldrest);
Chris@82 144 X(plan_destroy_internal)(ego->cld);
Chris@82 145 }
Chris@82 146
Chris@82 147 static void print(const plan *ego_, printer *p)
Chris@82 148 {
Chris@82 149 const P *ego = (const P *) ego_;
Chris@82 150 p->print(p, "(rdft2-rdft-%s-%D%v/%D-%D%(%p%)%(%p%))",
Chris@82 151 ego->super.apply == apply_r2hc ? "r2hc" : "hc2r",
Chris@82 152 ego->n, ego->nbuf,
Chris@82 153 ego->vl, ego->bufdist % ego->n,
Chris@82 154 ego->cld, ego->cldrest);
Chris@82 155 }
Chris@82 156
Chris@82 157 static INT min_nbuf(const problem_rdft2 *p, INT n, INT vl)
Chris@82 158 {
Chris@82 159 INT is, os, ivs, ovs;
Chris@82 160
Chris@82 161 if (p->r0 != p->cr)
Chris@82 162 return 1;
Chris@82 163 if (X(rdft2_inplace_strides(p, RNK_MINFTY)))
Chris@82 164 return 1;
Chris@82 165 A(p->vecsz->rnk == 1); /* rank 0 and MINFTY are inplace */
Chris@82 166
Chris@82 167 X(rdft2_strides)(p->kind, p->sz->dims, &is, &os);
Chris@82 168 X(rdft2_strides)(p->kind, p->vecsz->dims, &ivs, &ovs);
Chris@82 169
Chris@82 170 /* handle one potentially common case: "contiguous" real and
Chris@82 171 complex arrays, which overlap because of the differing sizes. */
Chris@82 172 if (n * X(iabs)(is) <= X(iabs)(ivs)
Chris@82 173 && (n/2 + 1) * X(iabs)(os) <= X(iabs)(ovs)
Chris@82 174 && ( ((p->cr - p->ci) <= X(iabs)(os)) ||
Chris@82 175 ((p->ci - p->cr) <= X(iabs)(os)) )
Chris@82 176 && ivs > 0 && ovs > 0) {
Chris@82 177 INT vsmin = X(imin)(ivs, ovs);
Chris@82 178 INT vsmax = X(imax)(ivs, ovs);
Chris@82 179 return(((vsmax - vsmin) * vl + vsmin - 1) / vsmin);
Chris@82 180 }
Chris@82 181
Chris@82 182 return vl; /* punt: just buffer the whole vector */
Chris@82 183 }
Chris@82 184
Chris@82 185 static int applicable0(const problem *p_, const S *ego, const planner *plnr)
Chris@82 186 {
Chris@82 187 const problem_rdft2 *p = (const problem_rdft2 *) p_;
Chris@82 188 UNUSED(ego);
Chris@82 189 return(1
Chris@82 190 && p->vecsz->rnk <= 1
Chris@82 191 && p->sz->rnk == 1
Chris@82 192
Chris@82 193 /* FIXME: does it make sense to do R2HCII ? */
Chris@82 194 && (p->kind == R2HC || p->kind == HC2R)
Chris@82 195
Chris@82 196 /* real strides must allow for reduction to rdft */
Chris@82 197 && (2 * (p->r1 - p->r0) ==
Chris@82 198 (((p->kind == R2HC) ? p->sz->dims[0].is : p->sz->dims[0].os)))
Chris@82 199
Chris@82 200 && !(X(toobig)(p->sz->dims[0].n) && CONSERVE_MEMORYP(plnr))
Chris@82 201 );
Chris@82 202 }
Chris@82 203
Chris@82 204 static int applicable(const problem *p_, const S *ego, const planner *plnr)
Chris@82 205 {
Chris@82 206 const problem_rdft2 *p;
Chris@82 207
Chris@82 208 if (NO_BUFFERINGP(plnr)) return 0;
Chris@82 209
Chris@82 210 if (!applicable0(p_, ego, plnr)) return 0;
Chris@82 211
Chris@82 212 p = (const problem_rdft2 *) p_;
Chris@82 213 if (NO_UGLYP(plnr)) {
Chris@82 214 if (p->r0 != p->cr) return 0;
Chris@82 215 if (X(toobig)(p->sz->dims[0].n)) return 0;
Chris@82 216 }
Chris@82 217 return 1;
Chris@82 218 }
Chris@82 219
Chris@82 220 static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
Chris@82 221 {
Chris@82 222 const S *ego = (const S *) ego_;
Chris@82 223 P *pln;
Chris@82 224 plan *cld = (plan *) 0;
Chris@82 225 plan *cldrest = (plan *) 0;
Chris@82 226 const problem_rdft2 *p = (const problem_rdft2 *) p_;
Chris@82 227 R *bufs = (R *) 0;
Chris@82 228 INT nbuf = 0, bufdist, n, vl;
Chris@82 229 INT ivs, ovs, rs, id, od;
Chris@82 230
Chris@82 231 static const plan_adt padt = {
Chris@82 232 X(rdft2_solve), awake, print, destroy
Chris@82 233 };
Chris@82 234
Chris@82 235 if (!applicable(p_, ego, plnr))
Chris@82 236 goto nada;
Chris@82 237
Chris@82 238 n = p->sz->dims[0].n;
Chris@82 239 X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);
Chris@82 240
Chris@82 241 nbuf = X(imax)(X(nbuf)(n, vl, 0), min_nbuf(p, n, vl));
Chris@82 242 bufdist = X(bufdist)(n, vl);
Chris@82 243 A(nbuf > 0);
Chris@82 244
Chris@82 245 /* initial allocation for the purpose of planning */
Chris@82 246 bufs = (R *) MALLOC(sizeof(R) * nbuf * bufdist, BUFFERS);
Chris@82 247
Chris@82 248 id = ivs * (nbuf * (vl / nbuf));
Chris@82 249 od = ovs * (nbuf * (vl / nbuf));
Chris@82 250
Chris@82 251 if (p->kind == R2HC) {
Chris@82 252 cld = X(mkplan_f_d)(
Chris@82 253 plnr,
Chris@82 254 X(mkproblem_rdft_d)(
Chris@82 255 X(mktensor_1d)(n, p->sz->dims[0].is/2, 1),
Chris@82 256 X(mktensor_1d)(nbuf, ivs, bufdist),
Chris@82 257 TAINT(p->r0, ivs * nbuf), bufs, &p->kind),
Chris@82 258 0, 0, (p->r0 == p->cr) ? NO_DESTROY_INPUT : 0);
Chris@82 259 if (!cld) goto nada;
Chris@82 260 X(ifree)(bufs); bufs = 0;
Chris@82 261
Chris@82 262 cldrest = X(mkplan_d)(plnr,
Chris@82 263 X(mkproblem_rdft2_d)(
Chris@82 264 X(tensor_copy)(p->sz),
Chris@82 265 X(mktensor_1d)(vl % nbuf, ivs, ovs),
Chris@82 266 p->r0 + id, p->r1 + id,
Chris@82 267 p->cr + od, p->ci + od,
Chris@82 268 p->kind));
Chris@82 269 if (!cldrest) goto nada;
Chris@82 270
Chris@82 271 pln = MKPLAN_RDFT2(P, &padt, apply_r2hc);
Chris@82 272 } else {
Chris@82 273 A(p->kind == HC2R);
Chris@82 274 cld = X(mkplan_f_d)(
Chris@82 275 plnr,
Chris@82 276 X(mkproblem_rdft_d)(
Chris@82 277 X(mktensor_1d)(n, 1, p->sz->dims[0].os/2),
Chris@82 278 X(mktensor_1d)(nbuf, bufdist, ovs),
Chris@82 279 bufs, TAINT(p->r0, ovs * nbuf), &p->kind),
Chris@82 280 0, 0, NO_DESTROY_INPUT); /* always ok to destroy bufs */
Chris@82 281 if (!cld) goto nada;
Chris@82 282 X(ifree)(bufs); bufs = 0;
Chris@82 283
Chris@82 284 cldrest = X(mkplan_d)(plnr,
Chris@82 285 X(mkproblem_rdft2_d)(
Chris@82 286 X(tensor_copy)(p->sz),
Chris@82 287 X(mktensor_1d)(vl % nbuf, ivs, ovs),
Chris@82 288 p->r0 + od, p->r1 + od,
Chris@82 289 p->cr + id, p->ci + id,
Chris@82 290 p->kind));
Chris@82 291 if (!cldrest) goto nada;
Chris@82 292 pln = MKPLAN_RDFT2(P, &padt, apply_hc2r);
Chris@82 293 }
Chris@82 294
Chris@82 295 pln->cld = cld;
Chris@82 296 pln->cldrest = cldrest;
Chris@82 297 pln->n = n;
Chris@82 298 pln->vl = vl;
Chris@82 299 pln->ivs = ivs;
Chris@82 300 pln->ovs = ovs;
Chris@82 301 X(rdft2_strides)(p->kind, &p->sz->dims[0], &rs, &pln->cs);
Chris@82 302 pln->nbuf = nbuf;
Chris@82 303 pln->bufdist = bufdist;
Chris@82 304
Chris@82 305 X(ops_madd)(vl / nbuf, &cld->ops, &cldrest->ops,
Chris@82 306 &pln->super.super.ops);
Chris@82 307 pln->super.super.ops.other += (p->kind == R2HC ? (n + 2) : n) * vl;
Chris@82 308
Chris@82 309 return &(pln->super.super);
Chris@82 310
Chris@82 311 nada:
Chris@82 312 X(ifree0)(bufs);
Chris@82 313 X(plan_destroy_internal)(cldrest);
Chris@82 314 X(plan_destroy_internal)(cld);
Chris@82 315 return (plan *) 0;
Chris@82 316 }
Chris@82 317
Chris@82 318 static solver *mksolver(void)
Chris@82 319 {
Chris@82 320 static const solver_adt sadt = { PROBLEM_RDFT2, mkplan, 0 };
Chris@82 321 S *slv = MKSOLVER(S, &sadt);
Chris@82 322 return &(slv->super);
Chris@82 323 }
Chris@82 324
Chris@82 325 void X(rdft2_rdft_register)(planner *p)
Chris@82 326 {
Chris@82 327 REGISTER_SOLVER(p, mksolver());
Chris@82 328 }