annotate src/fftw-3.3.8/rdft/rank0.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21
Chris@82 22 /* plans for rank-0 RDFTs (copy operations) */
Chris@82 23
Chris@82 24 #include "rdft/rdft.h"
Chris@82 25
Chris@82 26 #ifdef HAVE_STRING_H
Chris@82 27 #include <string.h> /* for memcpy() */
Chris@82 28 #endif
Chris@82 29
Chris@82 30 #define MAXRNK 32 /* FIXME: should malloc() */
Chris@82 31
Chris@82 32 typedef struct {
Chris@82 33 plan_rdft super;
Chris@82 34 INT vl;
Chris@82 35 int rnk;
Chris@82 36 iodim d[MAXRNK];
Chris@82 37 const char *nam;
Chris@82 38 } P;
Chris@82 39
Chris@82 40 typedef struct {
Chris@82 41 solver super;
Chris@82 42 rdftapply apply;
Chris@82 43 int (*applicable)(const P *pln, const problem_rdft *p);
Chris@82 44 const char *nam;
Chris@82 45 } S;
Chris@82 46
Chris@82 47 /* copy up to MAXRNK dimensions from problem into plan. If a
Chris@82 48 contiguous dimension exists, save its length in pln->vl */
Chris@82 49 static int fill_iodim(P *pln, const problem_rdft *p)
Chris@82 50 {
Chris@82 51 int i;
Chris@82 52 const tensor *vecsz = p->vecsz;
Chris@82 53
Chris@82 54 pln->vl = 1;
Chris@82 55 pln->rnk = 0;
Chris@82 56 for (i = 0; i < vecsz->rnk; ++i) {
Chris@82 57 /* extract contiguous dimensions */
Chris@82 58 if (pln->vl == 1 &&
Chris@82 59 vecsz->dims[i].is == 1 && vecsz->dims[i].os == 1)
Chris@82 60 pln->vl = vecsz->dims[i].n;
Chris@82 61 else if (pln->rnk == MAXRNK)
Chris@82 62 return 0;
Chris@82 63 else
Chris@82 64 pln->d[pln->rnk++] = vecsz->dims[i];
Chris@82 65 }
Chris@82 66
Chris@82 67 return 1;
Chris@82 68 }
Chris@82 69
Chris@82 70 /* generic higher-rank copy routine, calls cpy2d() to do the real work */
Chris@82 71 static void copy(const iodim *d, int rnk, INT vl,
Chris@82 72 R *I, R *O,
Chris@82 73 cpy2d_func cpy2d)
Chris@82 74 {
Chris@82 75 A(rnk >= 2);
Chris@82 76 if (rnk == 2)
Chris@82 77 cpy2d(I, O, d[0].n, d[0].is, d[0].os, d[1].n, d[1].is, d[1].os, vl);
Chris@82 78 else {
Chris@82 79 INT i;
Chris@82 80 for (i = 0; i < d[0].n; ++i, I += d[0].is, O += d[0].os)
Chris@82 81 copy(d + 1, rnk - 1, vl, I, O, cpy2d);
Chris@82 82 }
Chris@82 83 }
Chris@82 84
Chris@82 85 /* FIXME: should be more general */
Chris@82 86 static int transposep(const P *pln)
Chris@82 87 {
Chris@82 88 int i;
Chris@82 89
Chris@82 90 for (i = 0; i < pln->rnk - 2; ++i)
Chris@82 91 if (pln->d[i].is != pln->d[i].os)
Chris@82 92 return 0;
Chris@82 93
Chris@82 94 return (pln->d[i].n == pln->d[i+1].n &&
Chris@82 95 pln->d[i].is == pln->d[i+1].os &&
Chris@82 96 pln->d[i].os == pln->d[i+1].is);
Chris@82 97 }
Chris@82 98
Chris@82 99 /* generic higher-rank transpose routine, calls transpose2d() to do
Chris@82 100 * the real work */
Chris@82 101 static void transpose(const iodim *d, int rnk, INT vl,
Chris@82 102 R *I,
Chris@82 103 transpose_func transpose2d)
Chris@82 104 {
Chris@82 105 A(rnk >= 2);
Chris@82 106 if (rnk == 2)
Chris@82 107 transpose2d(I, d[0].n, d[0].is, d[0].os, vl);
Chris@82 108 else {
Chris@82 109 INT i;
Chris@82 110 for (i = 0; i < d[0].n; ++i, I += d[0].is)
Chris@82 111 transpose(d + 1, rnk - 1, vl, I, transpose2d);
Chris@82 112 }
Chris@82 113 }
Chris@82 114
Chris@82 115 /**************************************************************/
Chris@82 116 /* rank 0,1,2, out of place, iterative */
Chris@82 117 static void apply_iter(const plan *ego_, R *I, R *O)
Chris@82 118 {
Chris@82 119 const P *ego = (const P *) ego_;
Chris@82 120
Chris@82 121 switch (ego->rnk) {
Chris@82 122 case 0:
Chris@82 123 X(cpy1d)(I, O, ego->vl, 1, 1, 1);
Chris@82 124 break;
Chris@82 125 case 1:
Chris@82 126 X(cpy1d)(I, O,
Chris@82 127 ego->d[0].n, ego->d[0].is, ego->d[0].os,
Chris@82 128 ego->vl);
Chris@82 129 break;
Chris@82 130 default:
Chris@82 131 copy(ego->d, ego->rnk, ego->vl, I, O, X(cpy2d_ci));
Chris@82 132 break;
Chris@82 133 }
Chris@82 134 }
Chris@82 135
Chris@82 136 static int applicable_iter(const P *pln, const problem_rdft *p)
Chris@82 137 {
Chris@82 138 UNUSED(pln);
Chris@82 139 return (p->I != p->O);
Chris@82 140 }
Chris@82 141
Chris@82 142 /**************************************************************/
Chris@82 143 /* out of place, write contiguous output */
Chris@82 144 static void apply_cpy2dco(const plan *ego_, R *I, R *O)
Chris@82 145 {
Chris@82 146 const P *ego = (const P *) ego_;
Chris@82 147 copy(ego->d, ego->rnk, ego->vl, I, O, X(cpy2d_co));
Chris@82 148 }
Chris@82 149
Chris@82 150 static int applicable_cpy2dco(const P *pln, const problem_rdft *p)
Chris@82 151 {
Chris@82 152 int rnk = pln->rnk;
Chris@82 153 return (1
Chris@82 154 && p->I != p->O
Chris@82 155 && rnk >= 2
Chris@82 156
Chris@82 157 /* must not duplicate apply_iter */
Chris@82 158 && (X(iabs)(pln->d[rnk - 2].is) <= X(iabs)(pln->d[rnk - 1].is)
Chris@82 159 ||
Chris@82 160 X(iabs)(pln->d[rnk - 2].os) <= X(iabs)(pln->d[rnk - 1].os))
Chris@82 161 );
Chris@82 162 }
Chris@82 163
Chris@82 164 /**************************************************************/
Chris@82 165 /* out of place, tiled, no buffering */
Chris@82 166 static void apply_tiled(const plan *ego_, R *I, R *O)
Chris@82 167 {
Chris@82 168 const P *ego = (const P *) ego_;
Chris@82 169 copy(ego->d, ego->rnk, ego->vl, I, O, X(cpy2d_tiled));
Chris@82 170 }
Chris@82 171
Chris@82 172 static int applicable_tiled(const P *pln, const problem_rdft *p)
Chris@82 173 {
Chris@82 174 return (1
Chris@82 175 && p->I != p->O
Chris@82 176 && pln->rnk >= 2
Chris@82 177
Chris@82 178 /* somewhat arbitrary */
Chris@82 179 && X(compute_tilesz)(pln->vl, 1) > 4
Chris@82 180 );
Chris@82 181 }
Chris@82 182
Chris@82 183 /**************************************************************/
Chris@82 184 /* out of place, tiled, with buffer */
Chris@82 185 static void apply_tiledbuf(const plan *ego_, R *I, R *O)
Chris@82 186 {
Chris@82 187 const P *ego = (const P *) ego_;
Chris@82 188 copy(ego->d, ego->rnk, ego->vl, I, O, X(cpy2d_tiledbuf));
Chris@82 189 }
Chris@82 190
Chris@82 191 #define applicable_tiledbuf applicable_tiled
Chris@82 192
Chris@82 193 /**************************************************************/
Chris@82 194 /* rank 0, out of place, using memcpy */
Chris@82 195 static void apply_memcpy(const plan *ego_, R *I, R *O)
Chris@82 196 {
Chris@82 197 const P *ego = (const P *) ego_;
Chris@82 198
Chris@82 199 A(ego->rnk == 0);
Chris@82 200 memcpy(O, I, ego->vl * sizeof(R));
Chris@82 201 }
Chris@82 202
Chris@82 203 static int applicable_memcpy(const P *pln, const problem_rdft *p)
Chris@82 204 {
Chris@82 205 return (1
Chris@82 206 && p->I != p->O
Chris@82 207 && pln->rnk == 0
Chris@82 208 && pln->vl > 2 /* do not bother memcpy-ing complex numbers */
Chris@82 209 );
Chris@82 210 }
Chris@82 211
Chris@82 212 /**************************************************************/
Chris@82 213 /* rank > 0 vecloop, out of place, using memcpy (e.g. out-of-place
Chris@82 214 transposes of vl-tuples ... for large vl it should be more
Chris@82 215 efficient to use memcpy than the tiled stuff). */
Chris@82 216
Chris@82 217 static void memcpy_loop(size_t cpysz, int rnk, const iodim *d, R *I, R *O)
Chris@82 218 {
Chris@82 219 INT i, n = d->n, is = d->is, os = d->os;
Chris@82 220 if (rnk == 1)
Chris@82 221 for (i = 0; i < n; ++i, I += is, O += os)
Chris@82 222 memcpy(O, I, cpysz);
Chris@82 223 else {
Chris@82 224 --rnk; ++d;
Chris@82 225 for (i = 0; i < n; ++i, I += is, O += os)
Chris@82 226 memcpy_loop(cpysz, rnk, d, I, O);
Chris@82 227 }
Chris@82 228 }
Chris@82 229
Chris@82 230 static void apply_memcpy_loop(const plan *ego_, R *I, R *O)
Chris@82 231 {
Chris@82 232 const P *ego = (const P *) ego_;
Chris@82 233 memcpy_loop(ego->vl * sizeof(R), ego->rnk, ego->d, I, O);
Chris@82 234 }
Chris@82 235
Chris@82 236 static int applicable_memcpy_loop(const P *pln, const problem_rdft *p)
Chris@82 237 {
Chris@82 238 return (p->I != p->O
Chris@82 239 && pln->rnk > 0
Chris@82 240 && pln->vl > 2 /* do not bother memcpy-ing complex numbers */);
Chris@82 241 }
Chris@82 242
Chris@82 243 /**************************************************************/
Chris@82 244 /* rank 2, in place, square transpose, iterative */
Chris@82 245 static void apply_ip_sq(const plan *ego_, R *I, R *O)
Chris@82 246 {
Chris@82 247 const P *ego = (const P *) ego_;
Chris@82 248 UNUSED(O);
Chris@82 249 transpose(ego->d, ego->rnk, ego->vl, I, X(transpose));
Chris@82 250 }
Chris@82 251
Chris@82 252
Chris@82 253 static int applicable_ip_sq(const P *pln, const problem_rdft *p)
Chris@82 254 {
Chris@82 255 return (1
Chris@82 256 && p->I == p->O
Chris@82 257 && pln->rnk >= 2
Chris@82 258 && transposep(pln));
Chris@82 259 }
Chris@82 260
Chris@82 261 /**************************************************************/
Chris@82 262 /* rank 2, in place, square transpose, tiled */
Chris@82 263 static void apply_ip_sq_tiled(const plan *ego_, R *I, R *O)
Chris@82 264 {
Chris@82 265 const P *ego = (const P *) ego_;
Chris@82 266 UNUSED(O);
Chris@82 267 transpose(ego->d, ego->rnk, ego->vl, I, X(transpose_tiled));
Chris@82 268 }
Chris@82 269
Chris@82 270 static int applicable_ip_sq_tiled(const P *pln, const problem_rdft *p)
Chris@82 271 {
Chris@82 272 return (1
Chris@82 273 && applicable_ip_sq(pln, p)
Chris@82 274
Chris@82 275 /* somewhat arbitrary */
Chris@82 276 && X(compute_tilesz)(pln->vl, 2) > 4
Chris@82 277 );
Chris@82 278 }
Chris@82 279
Chris@82 280 /**************************************************************/
Chris@82 281 /* rank 2, in place, square transpose, tiled, buffered */
Chris@82 282 static void apply_ip_sq_tiledbuf(const plan *ego_, R *I, R *O)
Chris@82 283 {
Chris@82 284 const P *ego = (const P *) ego_;
Chris@82 285 UNUSED(O);
Chris@82 286 transpose(ego->d, ego->rnk, ego->vl, I, X(transpose_tiledbuf));
Chris@82 287 }
Chris@82 288
Chris@82 289 #define applicable_ip_sq_tiledbuf applicable_ip_sq_tiled
Chris@82 290
Chris@82 291 /**************************************************************/
Chris@82 292 static int applicable(const S *ego, const problem *p_)
Chris@82 293 {
Chris@82 294 const problem_rdft *p = (const problem_rdft *) p_;
Chris@82 295 P pln;
Chris@82 296 return (1
Chris@82 297 && p->sz->rnk == 0
Chris@82 298 && FINITE_RNK(p->vecsz->rnk)
Chris@82 299 && fill_iodim(&pln, p)
Chris@82 300 && ego->applicable(&pln, p)
Chris@82 301 );
Chris@82 302 }
Chris@82 303
Chris@82 304 static void print(const plan *ego_, printer *p)
Chris@82 305 {
Chris@82 306 const P *ego = (const P *) ego_;
Chris@82 307 int i;
Chris@82 308 p->print(p, "(%s/%D", ego->nam, ego->vl);
Chris@82 309 for (i = 0; i < ego->rnk; ++i)
Chris@82 310 p->print(p, "%v", ego->d[i].n);
Chris@82 311 p->print(p, ")");
Chris@82 312 }
Chris@82 313
Chris@82 314 static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
Chris@82 315 {
Chris@82 316 const problem_rdft *p;
Chris@82 317 const S *ego = (const S *) ego_;
Chris@82 318 P *pln;
Chris@82 319 int retval;
Chris@82 320
Chris@82 321 static const plan_adt padt = {
Chris@82 322 X(rdft_solve), X(null_awake), print, X(plan_null_destroy)
Chris@82 323 };
Chris@82 324
Chris@82 325 UNUSED(plnr);
Chris@82 326
Chris@82 327 if (!applicable(ego, p_))
Chris@82 328 return (plan *) 0;
Chris@82 329
Chris@82 330 p = (const problem_rdft *) p_;
Chris@82 331 pln = MKPLAN_RDFT(P, &padt, ego->apply);
Chris@82 332
Chris@82 333 retval = fill_iodim(pln, p);
Chris@82 334 (void)retval; /* UNUSED unless DEBUG */
Chris@82 335 A(retval);
Chris@82 336 A(pln->vl > 0); /* because FINITE_RNK(p->vecsz->rnk) holds */
Chris@82 337 pln->nam = ego->nam;
Chris@82 338
Chris@82 339 /* X(tensor_sz)(p->vecsz) loads, X(tensor_sz)(p->vecsz) stores */
Chris@82 340 X(ops_other)(2 * X(tensor_sz)(p->vecsz), &pln->super.super.ops);
Chris@82 341 return &(pln->super.super);
Chris@82 342 }
Chris@82 343
Chris@82 344
Chris@82 345 void X(rdft_rank0_register)(planner *p)
Chris@82 346 {
Chris@82 347 unsigned i;
Chris@82 348 static struct {
Chris@82 349 rdftapply apply;
Chris@82 350 int (*applicable)(const P *, const problem_rdft *);
Chris@82 351 const char *nam;
Chris@82 352 } tab[] = {
Chris@82 353 { apply_memcpy, applicable_memcpy, "rdft-rank0-memcpy" },
Chris@82 354 { apply_memcpy_loop, applicable_memcpy_loop,
Chris@82 355 "rdft-rank0-memcpy-loop" },
Chris@82 356 { apply_iter, applicable_iter, "rdft-rank0-iter-ci" },
Chris@82 357 { apply_cpy2dco, applicable_cpy2dco, "rdft-rank0-iter-co" },
Chris@82 358 { apply_tiled, applicable_tiled, "rdft-rank0-tiled" },
Chris@82 359 { apply_tiledbuf, applicable_tiledbuf, "rdft-rank0-tiledbuf" },
Chris@82 360 { apply_ip_sq, applicable_ip_sq, "rdft-rank0-ip-sq" },
Chris@82 361 {
Chris@82 362 apply_ip_sq_tiled,
Chris@82 363 applicable_ip_sq_tiled,
Chris@82 364 "rdft-rank0-ip-sq-tiled"
Chris@82 365 },
Chris@82 366 {
Chris@82 367 apply_ip_sq_tiledbuf,
Chris@82 368 applicable_ip_sq_tiledbuf,
Chris@82 369 "rdft-rank0-ip-sq-tiledbuf"
Chris@82 370 },
Chris@82 371 };
Chris@82 372
Chris@82 373 for (i = 0; i < sizeof(tab) / sizeof(tab[0]); ++i) {
Chris@82 374 static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 };
Chris@82 375 S *slv = MKSOLVER(S, &sadt);
Chris@82 376 slv->apply = tab[i].apply;
Chris@82 377 slv->applicable = tab[i].applicable;
Chris@82 378 slv->nam = tab[i].nam;
Chris@82 379 REGISTER_SOLVER(p, &(slv->super));
Chris@82 380 }
Chris@82 381 }