annotate src/fftw-3.3.3/rdft/direct-r2c.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 37bf6b4a2645
children
rev   line source
Chris@10 1 /*
Chris@10 2 * Copyright (c) 2003, 2007-11 Matteo Frigo
Chris@10 3 * Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology
Chris@10 4 *
Chris@10 5 * This program is free software; you can redistribute it and/or modify
Chris@10 6 * it under the terms of the GNU General Public License as published by
Chris@10 7 * the Free Software Foundation; either version 2 of the License, or
Chris@10 8 * (at your option) any later version.
Chris@10 9 *
Chris@10 10 * This program is distributed in the hope that it will be useful,
Chris@10 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@10 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@10 13 * GNU General Public License for more details.
Chris@10 14 *
Chris@10 15 * You should have received a copy of the GNU General Public License
Chris@10 16 * along with this program; if not, write to the Free Software
Chris@10 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@10 18 *
Chris@10 19 */
Chris@10 20
Chris@10 21
Chris@10 22 /* direct RDFT solver, using r2c codelets */
Chris@10 23
Chris@10 24 #include "rdft.h"
Chris@10 25
Chris@10 26 typedef struct {
Chris@10 27 solver super;
Chris@10 28 const kr2c_desc *desc;
Chris@10 29 kr2c k;
Chris@10 30 int bufferedp;
Chris@10 31 } S;
Chris@10 32
Chris@10 33 typedef struct {
Chris@10 34 plan_rdft super;
Chris@10 35
Chris@10 36 stride rs, csr, csi;
Chris@10 37 stride brs, bcsr, bcsi;
Chris@10 38 INT n, vl, rs0, ivs, ovs, ioffset, bioffset;
Chris@10 39 kr2c k;
Chris@10 40 const S *slv;
Chris@10 41 } P;
Chris@10 42
Chris@10 43 /*************************************************************
Chris@10 44 Nonbuffered code
Chris@10 45 *************************************************************/
Chris@10 46 static void apply_r2hc(const plan *ego_, R *I, R *O)
Chris@10 47 {
Chris@10 48 const P *ego = (const P *) ego_;
Chris@10 49 ASSERT_ALIGNED_DOUBLE;
Chris@10 50 ego->k(I, I + ego->rs0, O, O + ego->ioffset,
Chris@10 51 ego->rs, ego->csr, ego->csi,
Chris@10 52 ego->vl, ego->ivs, ego->ovs);
Chris@10 53 }
Chris@10 54
Chris@10 55 static void apply_hc2r(const plan *ego_, R *I, R *O)
Chris@10 56 {
Chris@10 57 const P *ego = (const P *) ego_;
Chris@10 58 ASSERT_ALIGNED_DOUBLE;
Chris@10 59 ego->k(O, O + ego->rs0, I, I + ego->ioffset,
Chris@10 60 ego->rs, ego->csr, ego->csi,
Chris@10 61 ego->vl, ego->ivs, ego->ovs);
Chris@10 62 }
Chris@10 63
Chris@10 64 /*************************************************************
Chris@10 65 Buffered code
Chris@10 66 *************************************************************/
Chris@10 67 /* should not be 2^k to avoid associativity conflicts */
Chris@10 68 static INT compute_batchsize(INT radix)
Chris@10 69 {
Chris@10 70 /* round up to multiple of 4 */
Chris@10 71 radix += 3;
Chris@10 72 radix &= -4;
Chris@10 73
Chris@10 74 return (radix + 2);
Chris@10 75 }
Chris@10 76
Chris@10 77 static void dobatch_r2hc(const P *ego, R *I, R *O, R *buf, INT batchsz)
Chris@10 78 {
Chris@10 79 X(cpy2d_ci)(I, buf,
Chris@10 80 ego->n, ego->rs0, WS(ego->bcsr /* hack */, 1),
Chris@10 81 batchsz, ego->ivs, 1, 1);
Chris@10 82
Chris@10 83 if (IABS(WS(ego->csr, 1)) < IABS(ego->ovs)) {
Chris@10 84 /* transform directly to output */
Chris@10 85 ego->k(buf, buf + WS(ego->bcsr /* hack */, 1),
Chris@10 86 O, O + ego->ioffset,
Chris@10 87 ego->brs, ego->csr, ego->csi,
Chris@10 88 batchsz, 1, ego->ovs);
Chris@10 89 } else {
Chris@10 90 /* transform to buffer and copy back */
Chris@10 91 ego->k(buf, buf + WS(ego->bcsr /* hack */, 1),
Chris@10 92 buf, buf + ego->bioffset,
Chris@10 93 ego->brs, ego->bcsr, ego->bcsi,
Chris@10 94 batchsz, 1, 1);
Chris@10 95 X(cpy2d_co)(buf, O,
Chris@10 96 ego->n, WS(ego->bcsr, 1), WS(ego->csr, 1),
Chris@10 97 batchsz, 1, ego->ovs, 1);
Chris@10 98 }
Chris@10 99 }
Chris@10 100
Chris@10 101 static void dobatch_hc2r(const P *ego, R *I, R *O, R *buf, INT batchsz)
Chris@10 102 {
Chris@10 103 if (IABS(WS(ego->csr, 1)) < IABS(ego->ivs)) {
Chris@10 104 /* transform directly from input */
Chris@10 105 ego->k(buf, buf + WS(ego->bcsr /* hack */, 1),
Chris@10 106 I, I + ego->ioffset,
Chris@10 107 ego->brs, ego->csr, ego->csi,
Chris@10 108 batchsz, ego->ivs, 1);
Chris@10 109 } else {
Chris@10 110 /* copy into buffer and transform in place */
Chris@10 111 X(cpy2d_ci)(I, buf,
Chris@10 112 ego->n, WS(ego->csr, 1), WS(ego->bcsr, 1),
Chris@10 113 batchsz, ego->ivs, 1, 1);
Chris@10 114 ego->k(buf, buf + WS(ego->bcsr /* hack */, 1),
Chris@10 115 buf, buf + ego->bioffset,
Chris@10 116 ego->brs, ego->bcsr, ego->bcsi,
Chris@10 117 batchsz, 1, 1);
Chris@10 118 }
Chris@10 119 X(cpy2d_co)(buf, O,
Chris@10 120 ego->n, WS(ego->bcsr /* hack */, 1), ego->rs0,
Chris@10 121 batchsz, 1, ego->ovs, 1);
Chris@10 122 }
Chris@10 123
Chris@10 124 static void iterate(const P *ego, R *I, R *O,
Chris@10 125 void (*dobatch)(const P *ego, R *I, R *O,
Chris@10 126 R *buf, INT batchsz))
Chris@10 127 {
Chris@10 128 R *buf;
Chris@10 129 INT vl = ego->vl;
Chris@10 130 INT n = ego->n;
Chris@10 131 INT i;
Chris@10 132 INT batchsz = compute_batchsize(n);
Chris@10 133 size_t bufsz = n * batchsz * sizeof(R);
Chris@10 134
Chris@10 135 BUF_ALLOC(R *, buf, bufsz);
Chris@10 136
Chris@10 137 for (i = 0; i < vl - batchsz; i += batchsz) {
Chris@10 138 dobatch(ego, I, O, buf, batchsz);
Chris@10 139 I += batchsz * ego->ivs;
Chris@10 140 O += batchsz * ego->ovs;
Chris@10 141 }
Chris@10 142 dobatch(ego, I, O, buf, vl - i);
Chris@10 143
Chris@10 144 BUF_FREE(buf, bufsz);
Chris@10 145 }
Chris@10 146
Chris@10 147 static void apply_buf_r2hc(const plan *ego_, R *I, R *O)
Chris@10 148 {
Chris@10 149 iterate((const P *) ego_, I, O, dobatch_r2hc);
Chris@10 150 }
Chris@10 151
Chris@10 152 static void apply_buf_hc2r(const plan *ego_, R *I, R *O)
Chris@10 153 {
Chris@10 154 iterate((const P *) ego_, I, O, dobatch_hc2r);
Chris@10 155 }
Chris@10 156
Chris@10 157 static void destroy(plan *ego_)
Chris@10 158 {
Chris@10 159 P *ego = (P *) ego_;
Chris@10 160 X(stride_destroy)(ego->rs);
Chris@10 161 X(stride_destroy)(ego->csr);
Chris@10 162 X(stride_destroy)(ego->csi);
Chris@10 163 X(stride_destroy)(ego->brs);
Chris@10 164 X(stride_destroy)(ego->bcsr);
Chris@10 165 X(stride_destroy)(ego->bcsi);
Chris@10 166 }
Chris@10 167
Chris@10 168 static void print(const plan *ego_, printer *p)
Chris@10 169 {
Chris@10 170 const P *ego = (const P *) ego_;
Chris@10 171 const S *s = ego->slv;
Chris@10 172
Chris@10 173 if (ego->slv->bufferedp)
Chris@10 174 p->print(p, "(rdft-%s-directbuf/%D-r2c-%D%v \"%s\")",
Chris@10 175 X(rdft_kind_str)(s->desc->genus->kind),
Chris@10 176 /* hack */ WS(ego->bcsr, 1), ego->n,
Chris@10 177 ego->vl, s->desc->nam);
Chris@10 178
Chris@10 179 else
Chris@10 180 p->print(p, "(rdft-%s-direct-r2c-%D%v \"%s\")",
Chris@10 181 X(rdft_kind_str)(s->desc->genus->kind), ego->n,
Chris@10 182 ego->vl, s->desc->nam);
Chris@10 183 }
Chris@10 184
Chris@10 185 static INT ioffset(rdft_kind kind, INT sz, INT s)
Chris@10 186 {
Chris@10 187 return(s * ((kind == R2HC || kind == HC2R) ? sz : (sz - 1)));
Chris@10 188 }
Chris@10 189
Chris@10 190 static int applicable(const solver *ego_, const problem *p_)
Chris@10 191 {
Chris@10 192 const S *ego = (const S *) ego_;
Chris@10 193 const kr2c_desc *desc = ego->desc;
Chris@10 194 const problem_rdft *p = (const problem_rdft *) p_;
Chris@10 195 INT vl, ivs, ovs;
Chris@10 196
Chris@10 197 return (
Chris@10 198 1
Chris@10 199 && p->sz->rnk == 1
Chris@10 200 && p->vecsz->rnk <= 1
Chris@10 201 && p->sz->dims[0].n == desc->n
Chris@10 202 && p->kind[0] == desc->genus->kind
Chris@10 203
Chris@10 204 /* check strides etc */
Chris@10 205 && X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs)
Chris@10 206
Chris@10 207 && (0
Chris@10 208 /* can operate out-of-place */
Chris@10 209 || p->I != p->O
Chris@10 210
Chris@10 211 /* computing one transform */
Chris@10 212 || vl == 1
Chris@10 213
Chris@10 214 /* can operate in-place as long as strides are the same */
Chris@10 215 || X(tensor_inplace_strides2)(p->sz, p->vecsz)
Chris@10 216 )
Chris@10 217 );
Chris@10 218 }
Chris@10 219
Chris@10 220 static int applicable_buf(const solver *ego_, const problem *p_)
Chris@10 221 {
Chris@10 222 const S *ego = (const S *) ego_;
Chris@10 223 const kr2c_desc *desc = ego->desc;
Chris@10 224 const problem_rdft *p = (const problem_rdft *) p_;
Chris@10 225 INT vl, ivs, ovs, batchsz;
Chris@10 226
Chris@10 227 return (
Chris@10 228 1
Chris@10 229 && p->sz->rnk == 1
Chris@10 230 && p->vecsz->rnk <= 1
Chris@10 231 && p->sz->dims[0].n == desc->n
Chris@10 232 && p->kind[0] == desc->genus->kind
Chris@10 233
Chris@10 234 /* check strides etc */
Chris@10 235 && X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs)
Chris@10 236
Chris@10 237 && (batchsz = compute_batchsize(desc->n), 1)
Chris@10 238
Chris@10 239 && (0
Chris@10 240 /* can operate out-of-place */
Chris@10 241 || p->I != p->O
Chris@10 242
Chris@10 243 /* can operate in-place as long as strides are the same */
Chris@10 244 || X(tensor_inplace_strides2)(p->sz, p->vecsz)
Chris@10 245
Chris@10 246 /* can do it if the problem fits in the buffer, no matter
Chris@10 247 what the strides are */
Chris@10 248 || vl <= batchsz
Chris@10 249 )
Chris@10 250 );
Chris@10 251 }
Chris@10 252
Chris@10 253 static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
Chris@10 254 {
Chris@10 255 const S *ego = (const S *) ego_;
Chris@10 256 P *pln;
Chris@10 257 const problem_rdft *p;
Chris@10 258 iodim *d;
Chris@10 259 INT rs, cs, b, n;
Chris@10 260
Chris@10 261 static const plan_adt padt = {
Chris@10 262 X(rdft_solve), X(null_awake), print, destroy
Chris@10 263 };
Chris@10 264
Chris@10 265 UNUSED(plnr);
Chris@10 266
Chris@10 267 if (ego->bufferedp) {
Chris@10 268 if (!applicable_buf(ego_, p_))
Chris@10 269 return (plan *)0;
Chris@10 270 } else {
Chris@10 271 if (!applicable(ego_, p_))
Chris@10 272 return (plan *)0;
Chris@10 273 }
Chris@10 274
Chris@10 275 p = (const problem_rdft *) p_;
Chris@10 276
Chris@10 277 if (R2HC_KINDP(p->kind[0])) {
Chris@10 278 rs = p->sz->dims[0].is; cs = p->sz->dims[0].os;
Chris@10 279 pln = MKPLAN_RDFT(P, &padt,
Chris@10 280 ego->bufferedp ? apply_buf_r2hc : apply_r2hc);
Chris@10 281 } else {
Chris@10 282 rs = p->sz->dims[0].os; cs = p->sz->dims[0].is;
Chris@10 283 pln = MKPLAN_RDFT(P, &padt,
Chris@10 284 ego->bufferedp ? apply_buf_hc2r : apply_hc2r);
Chris@10 285 }
Chris@10 286
Chris@10 287 d = p->sz->dims;
Chris@10 288 n = d[0].n;
Chris@10 289
Chris@10 290 pln->k = ego->k;
Chris@10 291 pln->n = n;
Chris@10 292
Chris@10 293 pln->rs0 = rs;
Chris@10 294 pln->rs = X(mkstride)(n, 2 * rs);
Chris@10 295 pln->csr = X(mkstride)(n, cs);
Chris@10 296 pln->csi = X(mkstride)(n, -cs);
Chris@10 297 pln->ioffset = ioffset(p->kind[0], n, cs);
Chris@10 298
Chris@10 299 b = compute_batchsize(n);
Chris@10 300 pln->brs = X(mkstride)(n, 2 * b);
Chris@10 301 pln->bcsr = X(mkstride)(n, b);
Chris@10 302 pln->bcsi = X(mkstride)(n, -b);
Chris@10 303 pln->bioffset = ioffset(p->kind[0], n, b);
Chris@10 304
Chris@10 305 X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs);
Chris@10 306
Chris@10 307 pln->slv = ego;
Chris@10 308 X(ops_zero)(&pln->super.super.ops);
Chris@10 309
Chris@10 310 X(ops_madd2)(pln->vl / ego->desc->genus->vl,
Chris@10 311 &ego->desc->ops,
Chris@10 312 &pln->super.super.ops);
Chris@10 313
Chris@10 314 if (ego->bufferedp)
Chris@10 315 pln->super.super.ops.other += 2 * n * pln->vl;
Chris@10 316
Chris@10 317 pln->super.super.could_prune_now_p = !ego->bufferedp;
Chris@10 318
Chris@10 319 return &(pln->super.super);
Chris@10 320 }
Chris@10 321
Chris@10 322 /* constructor */
Chris@10 323 static solver *mksolver(kr2c k, const kr2c_desc *desc, int bufferedp)
Chris@10 324 {
Chris@10 325 static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 };
Chris@10 326 S *slv = MKSOLVER(S, &sadt);
Chris@10 327 slv->k = k;
Chris@10 328 slv->desc = desc;
Chris@10 329 slv->bufferedp = bufferedp;
Chris@10 330 return &(slv->super);
Chris@10 331 }
Chris@10 332
Chris@10 333 solver *X(mksolver_rdft_r2c_direct)(kr2c k, const kr2c_desc *desc)
Chris@10 334 {
Chris@10 335 return mksolver(k, desc, 0);
Chris@10 336 }
Chris@10 337
Chris@10 338 solver *X(mksolver_rdft_r2c_directbuf)(kr2c k, const kr2c_desc *desc)
Chris@10 339 {
Chris@10 340 return mksolver(k, desc, 1);
Chris@10 341 }