annotate src/fftw-3.3.3/rdft/hc2hc-generic.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 37bf6b4a2645
children
rev   line source
Chris@10 1 /*
Chris@10 2 * Copyright (c) 2003, 2007-11 Matteo Frigo
Chris@10 3 * Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology
Chris@10 4 *
Chris@10 5 * This program is free software; you can redistribute it and/or modify
Chris@10 6 * it under the terms of the GNU General Public License as published by
Chris@10 7 * the Free Software Foundation; either version 2 of the License, or
Chris@10 8 * (at your option) any later version.
Chris@10 9 *
Chris@10 10 * This program is distributed in the hope that it will be useful,
Chris@10 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@10 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@10 13 * GNU General Public License for more details.
Chris@10 14 *
Chris@10 15 * You should have received a copy of the GNU General Public License
Chris@10 16 * along with this program; if not, write to the Free Software
Chris@10 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@10 18 *
Chris@10 19 */
Chris@10 20
Chris@10 21 /* express a hc2hc problem in terms of rdft + multiplication by
Chris@10 22 twiddle factors */
Chris@10 23
Chris@10 24 #include "hc2hc.h"
Chris@10 25
Chris@10 26 typedef hc2hc_solver S;
Chris@10 27
Chris@10 28 typedef struct {
Chris@10 29 plan_hc2hc super;
Chris@10 30
Chris@10 31 INT r, m, s, vl, vs, mstart1, mcount1;
Chris@10 32 plan *cld0;
Chris@10 33 plan *cld;
Chris@10 34 twid *td;
Chris@10 35 } P;
Chris@10 36
Chris@10 37
Chris@10 38 /**************************************************************/
Chris@10 39 static void mktwiddle(P *ego, enum wakefulness wakefulness)
Chris@10 40 {
Chris@10 41 static const tw_instr tw[] = { { TW_HALF, 0, 0 }, { TW_NEXT, 1, 0 } };
Chris@10 42
Chris@10 43 /* note that R and M are swapped, to allow for sequential
Chris@10 44 access both to data and twiddles */
Chris@10 45 X(twiddle_awake)(wakefulness, &ego->td, tw,
Chris@10 46 ego->r * ego->m, ego->m, ego->r);
Chris@10 47 }
Chris@10 48
Chris@10 49 static void bytwiddle(const P *ego, R *IO, R sign)
Chris@10 50 {
Chris@10 51 INT i, j, k;
Chris@10 52 INT r = ego->r, m = ego->m, s = ego->s, vl = ego->vl, vs = ego->vs;
Chris@10 53 INT ms = m * s;
Chris@10 54 INT mstart1 = ego->mstart1, mcount1 = ego->mcount1;
Chris@10 55 INT wrem = 2 * ((m-1)/2 - mcount1);
Chris@10 56
Chris@10 57 for (i = 0; i < vl; ++i, IO += vs) {
Chris@10 58 const R *W = ego->td->W;
Chris@10 59
Chris@10 60 A(m % 2 == 1);
Chris@10 61 for (k = 1, W += (m - 1) + 2*(mstart1-1); k < r; ++k) {
Chris@10 62 /* pr := IO + (j + mstart1) * s + k * ms */
Chris@10 63 R *pr = IO + mstart1 * s + k * ms;
Chris@10 64
Chris@10 65 /* pi := IO + (m - j - mstart1) * s + k * ms */
Chris@10 66 R *pi = IO - mstart1 * s + (k + 1) * ms;
Chris@10 67
Chris@10 68 for (j = 0; j < mcount1; ++j, pr += s, pi -= s) {
Chris@10 69 E xr = *pr;
Chris@10 70 E xi = *pi;
Chris@10 71 E wr = W[0];
Chris@10 72 E wi = sign * W[1];
Chris@10 73 *pr = xr * wr - xi * wi;
Chris@10 74 *pi = xi * wr + xr * wi;
Chris@10 75 W += 2;
Chris@10 76 }
Chris@10 77 W += wrem;
Chris@10 78 }
Chris@10 79 }
Chris@10 80 }
Chris@10 81
Chris@10 82 static void swapri(R *IO, INT r, INT m, INT s, INT jstart, INT jend)
Chris@10 83 {
Chris@10 84 INT k;
Chris@10 85 INT ms = m * s;
Chris@10 86 INT js = jstart * s;
Chris@10 87 for (k = 0; k + k < r; ++k) {
Chris@10 88 /* pr := IO + (m - j) * s + k * ms */
Chris@10 89 R *pr = IO + (k + 1) * ms - js;
Chris@10 90 /* pi := IO + (m - j) * s + (r - 1 - k) * ms */
Chris@10 91 R *pi = IO + (r - k) * ms - js;
Chris@10 92 INT j;
Chris@10 93 for (j = jstart; j < jend; j += 1, pr -= s, pi -= s) {
Chris@10 94 R t = *pr;
Chris@10 95 *pr = *pi;
Chris@10 96 *pi = t;
Chris@10 97 }
Chris@10 98 }
Chris@10 99 }
Chris@10 100
Chris@10 101 static void reorder_dit(const P *ego, R *IO)
Chris@10 102 {
Chris@10 103 INT i, k;
Chris@10 104 INT r = ego->r, m = ego->m, s = ego->s, vl = ego->vl, vs = ego->vs;
Chris@10 105 INT ms = m * s;
Chris@10 106 INT mstart1 = ego->mstart1, mend1 = mstart1 + ego->mcount1;
Chris@10 107
Chris@10 108 for (i = 0; i < vl; ++i, IO += vs) {
Chris@10 109 for (k = 1; k + k < r; ++k) {
Chris@10 110 R *p0 = IO + k * ms;
Chris@10 111 R *p1 = IO + (r - k) * ms;
Chris@10 112 INT j;
Chris@10 113
Chris@10 114 for (j = mstart1; j < mend1; ++j) {
Chris@10 115 E rp, ip, im, rm;
Chris@10 116 rp = p0[j * s];
Chris@10 117 im = p1[ms - j * s];
Chris@10 118 rm = p1[j * s];
Chris@10 119 ip = p0[ms - j * s];
Chris@10 120 p0[j * s] = rp - im;
Chris@10 121 p1[ms - j * s] = rp + im;
Chris@10 122 p1[j * s] = rm - ip;
Chris@10 123 p0[ms - j * s] = ip + rm;
Chris@10 124 }
Chris@10 125 }
Chris@10 126
Chris@10 127 swapri(IO, r, m, s, mstart1, mend1);
Chris@10 128 }
Chris@10 129 }
Chris@10 130
Chris@10 131 static void reorder_dif(const P *ego, R *IO)
Chris@10 132 {
Chris@10 133 INT i, k;
Chris@10 134 INT r = ego->r, m = ego->m, s = ego->s, vl = ego->vl, vs = ego->vs;
Chris@10 135 INT ms = m * s;
Chris@10 136 INT mstart1 = ego->mstart1, mend1 = mstart1 + ego->mcount1;
Chris@10 137
Chris@10 138 for (i = 0; i < vl; ++i, IO += vs) {
Chris@10 139 swapri(IO, r, m, s, mstart1, mend1);
Chris@10 140
Chris@10 141 for (k = 1; k + k < r; ++k) {
Chris@10 142 R *p0 = IO + k * ms;
Chris@10 143 R *p1 = IO + (r - k) * ms;
Chris@10 144 const R half = K(0.5);
Chris@10 145 INT j;
Chris@10 146
Chris@10 147 for (j = mstart1; j < mend1; ++j) {
Chris@10 148 E rp, ip, im, rm;
Chris@10 149 rp = half * p0[j * s];
Chris@10 150 im = half * p1[ms - j * s];
Chris@10 151 rm = half * p1[j * s];
Chris@10 152 ip = half * p0[ms - j * s];
Chris@10 153 p0[j * s] = rp + im;
Chris@10 154 p1[ms - j * s] = im - rp;
Chris@10 155 p1[j * s] = rm + ip;
Chris@10 156 p0[ms - j * s] = ip - rm;
Chris@10 157 }
Chris@10 158 }
Chris@10 159 }
Chris@10 160 }
Chris@10 161
Chris@10 162 static int applicable(rdft_kind kind, INT r, INT m, const planner *plnr)
Chris@10 163 {
Chris@10 164 return (1
Chris@10 165 && (kind == R2HC || kind == HC2R)
Chris@10 166 && (m % 2)
Chris@10 167 && (r % 2)
Chris@10 168 && !NO_SLOWP(plnr)
Chris@10 169 );
Chris@10 170 }
Chris@10 171
Chris@10 172 /**************************************************************/
Chris@10 173
Chris@10 174 static void apply_dit(const plan *ego_, R *IO)
Chris@10 175 {
Chris@10 176 const P *ego = (const P *) ego_;
Chris@10 177 INT start;
Chris@10 178 plan_rdft *cld, *cld0;
Chris@10 179
Chris@10 180 bytwiddle(ego, IO, K(-1.0));
Chris@10 181
Chris@10 182 cld0 = (plan_rdft *) ego->cld0;
Chris@10 183 cld0->apply(ego->cld0, IO, IO);
Chris@10 184
Chris@10 185 start = ego->mstart1 * ego->s;
Chris@10 186 cld = (plan_rdft *) ego->cld;
Chris@10 187 cld->apply(ego->cld, IO + start, IO + start);
Chris@10 188
Chris@10 189 reorder_dit(ego, IO);
Chris@10 190 }
Chris@10 191
Chris@10 192 static void apply_dif(const plan *ego_, R *IO)
Chris@10 193 {
Chris@10 194 const P *ego = (const P *) ego_;
Chris@10 195 INT start;
Chris@10 196 plan_rdft *cld, *cld0;
Chris@10 197
Chris@10 198 reorder_dif(ego, IO);
Chris@10 199
Chris@10 200 cld0 = (plan_rdft *) ego->cld0;
Chris@10 201 cld0->apply(ego->cld0, IO, IO);
Chris@10 202
Chris@10 203 start = ego->mstart1 * ego->s;
Chris@10 204 cld = (plan_rdft *) ego->cld;
Chris@10 205 cld->apply(ego->cld, IO + start, IO + start);
Chris@10 206
Chris@10 207 bytwiddle(ego, IO, K(1.0));
Chris@10 208 }
Chris@10 209
Chris@10 210
Chris@10 211 static void awake(plan *ego_, enum wakefulness wakefulness)
Chris@10 212 {
Chris@10 213 P *ego = (P *) ego_;
Chris@10 214 X(plan_awake)(ego->cld0, wakefulness);
Chris@10 215 X(plan_awake)(ego->cld, wakefulness);
Chris@10 216 mktwiddle(ego, wakefulness);
Chris@10 217 }
Chris@10 218
Chris@10 219 static void destroy(plan *ego_)
Chris@10 220 {
Chris@10 221 P *ego = (P *) ego_;
Chris@10 222 X(plan_destroy_internal)(ego->cld);
Chris@10 223 X(plan_destroy_internal)(ego->cld0);
Chris@10 224 }
Chris@10 225
Chris@10 226 static void print(const plan *ego_, printer *p)
Chris@10 227 {
Chris@10 228 const P *ego = (const P *) ego_;
Chris@10 229 p->print(p, "(hc2hc-generic-%s-%D-%D%v%(%p%)%(%p%))",
Chris@10 230 ego->super.apply == apply_dit ? "dit" : "dif",
Chris@10 231 ego->r, ego->m, ego->vl, ego->cld0, ego->cld);
Chris@10 232 }
Chris@10 233
Chris@10 234 static plan *mkcldw(const hc2hc_solver *ego_,
Chris@10 235 rdft_kind kind, INT r, INT m, INT s, INT vl, INT vs,
Chris@10 236 INT mstart, INT mcount,
Chris@10 237 R *IO, planner *plnr)
Chris@10 238 {
Chris@10 239 P *pln;
Chris@10 240 plan *cld0 = 0, *cld = 0;
Chris@10 241 INT mstart1, mcount1, mstride;
Chris@10 242
Chris@10 243 static const plan_adt padt = {
Chris@10 244 0, awake, print, destroy
Chris@10 245 };
Chris@10 246
Chris@10 247 UNUSED(ego_);
Chris@10 248
Chris@10 249 A(mstart >= 0 && mcount > 0 && mstart + mcount <= (m+2)/2);
Chris@10 250
Chris@10 251 if (!applicable(kind, r, m, plnr))
Chris@10 252 return (plan *)0;
Chris@10 253
Chris@10 254 A(m % 2);
Chris@10 255 mstart1 = mstart + (mstart == 0);
Chris@10 256 mcount1 = mcount - (mstart == 0);
Chris@10 257 mstride = m - (mstart + mcount - 1) - mstart1;
Chris@10 258
Chris@10 259 /* 0th (DC) transform (vl of these), if mstart == 0 */
Chris@10 260 cld0 = X(mkplan_d)(plnr,
Chris@10 261 X(mkproblem_rdft_1_d)(
Chris@10 262 mstart == 0 ? X(mktensor_1d)(r, m * s, m * s)
Chris@10 263 : X(mktensor_0d)(),
Chris@10 264 X(mktensor_1d)(vl, vs, vs),
Chris@10 265 IO, IO, kind)
Chris@10 266 );
Chris@10 267 if (!cld0) goto nada;
Chris@10 268
Chris@10 269 /* twiddle transforms: there are 2 x mcount1 x vl of these
Chris@10 270 (where 2 corresponds to the real and imaginary parts) ...
Chris@10 271 the 2 x mcount1 loops are combined if mstart=0 and mcount=(m+2)/2. */
Chris@10 272 cld = X(mkplan_d)(plnr,
Chris@10 273 X(mkproblem_rdft_1_d)(
Chris@10 274 X(mktensor_1d)(r, m * s, m * s),
Chris@10 275 X(mktensor_3d)(2, mstride * s, mstride * s,
Chris@10 276 mcount1, s, s,
Chris@10 277 vl, vs, vs),
Chris@10 278 IO + s * mstart1, IO + s * mstart1, kind)
Chris@10 279 );
Chris@10 280 if (!cld) goto nada;
Chris@10 281
Chris@10 282 pln = MKPLAN_HC2HC(P, &padt, (kind == R2HC) ? apply_dit : apply_dif);
Chris@10 283 pln->cld = cld;
Chris@10 284 pln->cld0 = cld0;
Chris@10 285 pln->r = r;
Chris@10 286 pln->m = m;
Chris@10 287 pln->s = s;
Chris@10 288 pln->vl = vl;
Chris@10 289 pln->vs = vs;
Chris@10 290 pln->td = 0;
Chris@10 291 pln->mstart1 = mstart1;
Chris@10 292 pln->mcount1 = mcount1;
Chris@10 293
Chris@10 294 {
Chris@10 295 double n0 = 0.5 * (r - 1) * (2 * mcount1) * vl;
Chris@10 296 pln->super.super.ops = cld->ops;
Chris@10 297 pln->super.super.ops.mul += (kind == R2HC ? 5.0 : 7.0) * n0;
Chris@10 298 pln->super.super.ops.add += 4.0 * n0;
Chris@10 299 pln->super.super.ops.other += 11.0 * n0;
Chris@10 300 }
Chris@10 301 return &(pln->super.super);
Chris@10 302
Chris@10 303 nada:
Chris@10 304 X(plan_destroy_internal)(cld);
Chris@10 305 X(plan_destroy_internal)(cld0);
Chris@10 306 return (plan *) 0;
Chris@10 307 }
Chris@10 308
Chris@10 309 static void regsolver(planner *plnr, INT r)
Chris@10 310 {
Chris@10 311 S *slv = (S *)X(mksolver_hc2hc)(sizeof(S), r, mkcldw);
Chris@10 312 REGISTER_SOLVER(plnr, &(slv->super));
Chris@10 313 if (X(mksolver_hc2hc_hook)) {
Chris@10 314 slv = (S *)X(mksolver_hc2hc_hook)(sizeof(S), r, mkcldw);
Chris@10 315 REGISTER_SOLVER(plnr, &(slv->super));
Chris@10 316 }
Chris@10 317 }
Chris@10 318
Chris@10 319 void X(hc2hc_generic_register)(planner *p)
Chris@10 320 {
Chris@10 321 regsolver(p, 0);
Chris@10 322 }