annotate src/fftw-3.3.5/libbench2/verify-r2r.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* Lots of ugly duplication from verify-lib.c, plus lots of ugliness in
Chris@42 22 general for all of the r2r variants...oh well, for now */
Chris@42 23
Chris@42 24 #include "verify.h"
Chris@42 25 #include <math.h>
Chris@42 26 #include <stdlib.h>
Chris@42 27 #include <stdio.h>
Chris@42 28
Chris@42 29 typedef struct {
Chris@42 30 bench_problem *p;
Chris@42 31 bench_tensor *probsz;
Chris@42 32 bench_tensor *totalsz;
Chris@42 33 bench_tensor *pckdsz;
Chris@42 34 bench_tensor *pckdvecsz;
Chris@42 35 } info;
Chris@42 36
Chris@42 37 /*
Chris@42 38 * Utility functions:
Chris@42 39 */
Chris@42 40
Chris@42 41 static double dabs(double x) { return (x < 0.0) ? -x : x; }
Chris@42 42 static double dmin(double x, double y) { return (x < y) ? x : y; }
Chris@42 43
Chris@42 44 static double raerror(R *a, R *b, int n)
Chris@42 45 {
Chris@42 46 if (n > 0) {
Chris@42 47 /* compute the relative Linf error */
Chris@42 48 double e = 0.0, mag = 0.0;
Chris@42 49 int i;
Chris@42 50
Chris@42 51 for (i = 0; i < n; ++i) {
Chris@42 52 e = dmax(e, dabs(a[i] - b[i]));
Chris@42 53 mag = dmax(mag, dmin(dabs(a[i]), dabs(b[i])));
Chris@42 54 }
Chris@42 55 if (dabs(mag) < 1e-14 && dabs(e) < 1e-14)
Chris@42 56 e = 0.0;
Chris@42 57 else
Chris@42 58 e /= mag;
Chris@42 59
Chris@42 60 #ifdef HAVE_ISNAN
Chris@42 61 BENCH_ASSERT(!isnan(e));
Chris@42 62 #endif
Chris@42 63 return e;
Chris@42 64 } else
Chris@42 65 return 0.0;
Chris@42 66 }
Chris@42 67
Chris@42 68 #define by2pi(m, n) ((K2PI * (m)) / (n))
Chris@42 69
Chris@42 70 /*
Chris@42 71 * Improve accuracy by reducing x to range [0..1/8]
Chris@42 72 * before multiplication by 2 * PI.
Chris@42 73 */
Chris@42 74
Chris@42 75 static trigreal bench_sincos(trigreal m, trigreal n, int sinp)
Chris@42 76 {
Chris@42 77 /* waiting for C to get tail recursion... */
Chris@42 78 trigreal half_n = n * 0.5;
Chris@42 79 trigreal quarter_n = half_n * 0.5;
Chris@42 80 trigreal eighth_n = quarter_n * 0.5;
Chris@42 81 trigreal sgn = 1.0;
Chris@42 82
Chris@42 83 if (sinp) goto sin;
Chris@42 84 cos:
Chris@42 85 if (m < 0) { m = -m; /* goto cos; */ }
Chris@42 86 if (m > half_n) { m = n - m; goto cos; }
Chris@42 87 if (m > eighth_n) { m = quarter_n - m; goto sin; }
Chris@42 88 return sgn * COS(by2pi(m, n));
Chris@42 89
Chris@42 90 msin:
Chris@42 91 sgn = -sgn;
Chris@42 92 sin:
Chris@42 93 if (m < 0) { m = -m; goto msin; }
Chris@42 94 if (m > half_n) { m = n - m; goto msin; }
Chris@42 95 if (m > eighth_n) { m = quarter_n - m; goto cos; }
Chris@42 96 return sgn * SIN(by2pi(m, n));
Chris@42 97 }
Chris@42 98
Chris@42 99 static trigreal cos2pi(int m, int n)
Chris@42 100 {
Chris@42 101 return bench_sincos((trigreal)m, (trigreal)n, 0);
Chris@42 102 }
Chris@42 103
Chris@42 104 static trigreal sin2pi(int m, int n)
Chris@42 105 {
Chris@42 106 return bench_sincos((trigreal)m, (trigreal)n, 1);
Chris@42 107 }
Chris@42 108
Chris@42 109 static trigreal cos00(int i, int j, int n)
Chris@42 110 {
Chris@42 111 return cos2pi(i * j, n);
Chris@42 112 }
Chris@42 113
Chris@42 114 static trigreal cos01(int i, int j, int n)
Chris@42 115 {
Chris@42 116 return cos00(i, 2*j + 1, 2*n);
Chris@42 117 }
Chris@42 118
Chris@42 119 static trigreal cos10(int i, int j, int n)
Chris@42 120 {
Chris@42 121 return cos00(2*i + 1, j, 2*n);
Chris@42 122 }
Chris@42 123
Chris@42 124 static trigreal cos11(int i, int j, int n)
Chris@42 125 {
Chris@42 126 return cos00(2*i + 1, 2*j + 1, 4*n);
Chris@42 127 }
Chris@42 128
Chris@42 129 static trigreal sin00(int i, int j, int n)
Chris@42 130 {
Chris@42 131 return sin2pi(i * j, n);
Chris@42 132 }
Chris@42 133
Chris@42 134 static trigreal sin01(int i, int j, int n)
Chris@42 135 {
Chris@42 136 return sin00(i, 2*j + 1, 2*n);
Chris@42 137 }
Chris@42 138
Chris@42 139 static trigreal sin10(int i, int j, int n)
Chris@42 140 {
Chris@42 141 return sin00(2*i + 1, j, 2*n);
Chris@42 142 }
Chris@42 143
Chris@42 144 static trigreal sin11(int i, int j, int n)
Chris@42 145 {
Chris@42 146 return sin00(2*i + 1, 2*j + 1, 4*n);
Chris@42 147 }
Chris@42 148
Chris@42 149 static trigreal realhalf(int i, int j, int n)
Chris@42 150 {
Chris@42 151 UNUSED(i);
Chris@42 152 if (j <= n - j)
Chris@42 153 return 1.0;
Chris@42 154 else
Chris@42 155 return 0.0;
Chris@42 156 }
Chris@42 157
Chris@42 158 static trigreal coshalf(int i, int j, int n)
Chris@42 159 {
Chris@42 160 if (j <= n - j)
Chris@42 161 return cos00(i, j, n);
Chris@42 162 else
Chris@42 163 return cos00(i, n - j, n);
Chris@42 164 }
Chris@42 165
Chris@42 166 static trigreal unity(int i, int j, int n)
Chris@42 167 {
Chris@42 168 UNUSED(i);
Chris@42 169 UNUSED(j);
Chris@42 170 UNUSED(n);
Chris@42 171 return 1.0;
Chris@42 172 }
Chris@42 173
Chris@42 174 typedef trigreal (*trigfun)(int, int, int);
Chris@42 175
Chris@42 176 static void rarand(R *a, int n)
Chris@42 177 {
Chris@42 178 int i;
Chris@42 179
Chris@42 180 /* generate random inputs */
Chris@42 181 for (i = 0; i < n; ++i) {
Chris@42 182 a[i] = mydrand();
Chris@42 183 }
Chris@42 184 }
Chris@42 185
Chris@42 186 /* C = A + B */
Chris@42 187 static void raadd(R *c, R *a, R *b, int n)
Chris@42 188 {
Chris@42 189 int i;
Chris@42 190
Chris@42 191 for (i = 0; i < n; ++i) {
Chris@42 192 c[i] = a[i] + b[i];
Chris@42 193 }
Chris@42 194 }
Chris@42 195
Chris@42 196 /* C = A - B */
Chris@42 197 static void rasub(R *c, R *a, R *b, int n)
Chris@42 198 {
Chris@42 199 int i;
Chris@42 200
Chris@42 201 for (i = 0; i < n; ++i) {
Chris@42 202 c[i] = a[i] - b[i];
Chris@42 203 }
Chris@42 204 }
Chris@42 205
Chris@42 206 /* B = rotate left A + rotate right A */
Chris@42 207 static void rarolr(R *b, R *a, int n, int nb, int na,
Chris@42 208 r2r_kind_t k)
Chris@42 209 {
Chris@42 210 int isL0 = 0, isL1 = 0, isR0 = 0, isR1 = 0;
Chris@42 211 int i, ib, ia;
Chris@42 212
Chris@42 213 for (ib = 0; ib < nb; ++ib) {
Chris@42 214 for (i = 0; i < n - 1; ++i)
Chris@42 215 for (ia = 0; ia < na; ++ia)
Chris@42 216 b[(ib * n + i) * na + ia] =
Chris@42 217 a[(ib * n + i + 1) * na + ia];
Chris@42 218
Chris@42 219 /* ugly switch to do boundary conditions for various r2r types */
Chris@42 220 switch (k) {
Chris@42 221 /* periodic boundaries */
Chris@42 222 case R2R_DHT:
Chris@42 223 case R2R_R2HC:
Chris@42 224 for (ia = 0; ia < na; ++ia) {
Chris@42 225 b[(ib * n + n - 1) * na + ia] =
Chris@42 226 a[(ib * n + 0) * na + ia];
Chris@42 227 b[(ib * n + 0) * na + ia] +=
Chris@42 228 a[(ib * n + n - 1) * na + ia];
Chris@42 229 }
Chris@42 230 break;
Chris@42 231
Chris@42 232 case R2R_HC2R: /* ugh (hermitian halfcomplex boundaries) */
Chris@42 233 if (n > 2) {
Chris@42 234 if (n % 2 == 0)
Chris@42 235 for (ia = 0; ia < na; ++ia) {
Chris@42 236 b[(ib * n + n - 1) * na + ia] = 0.0;
Chris@42 237 b[(ib * n + 0) * na + ia] +=
Chris@42 238 a[(ib * n + 1) * na + ia];
Chris@42 239 b[(ib * n + n/2) * na + ia] +=
Chris@42 240 + a[(ib * n + n/2 - 1) * na + ia]
Chris@42 241 - a[(ib * n + n/2 + 1) * na + ia];
Chris@42 242 b[(ib * n + n/2 + 1) * na + ia] +=
Chris@42 243 - a[(ib * n + n/2) * na + ia];
Chris@42 244 }
Chris@42 245 else
Chris@42 246 for (ia = 0; ia < na; ++ia) {
Chris@42 247 b[(ib * n + n - 1) * na + ia] = 0.0;
Chris@42 248 b[(ib * n + 0) * na + ia] +=
Chris@42 249 a[(ib * n + 1) * na + ia];
Chris@42 250 b[(ib * n + n/2) * na + ia] +=
Chris@42 251 + a[(ib * n + n/2) * na + ia]
Chris@42 252 - a[(ib * n + n/2 + 1) * na + ia];
Chris@42 253 b[(ib * n + n/2 + 1) * na + ia] +=
Chris@42 254 - a[(ib * n + n/2 + 1) * na + ia]
Chris@42 255 - a[(ib * n + n/2) * na + ia];
Chris@42 256 }
Chris@42 257 } else /* n <= 2 */ {
Chris@42 258 for (ia = 0; ia < na; ++ia) {
Chris@42 259 b[(ib * n + n - 1) * na + ia] =
Chris@42 260 a[(ib * n + 0) * na + ia];
Chris@42 261 b[(ib * n + 0) * na + ia] +=
Chris@42 262 a[(ib * n + n - 1) * na + ia];
Chris@42 263 }
Chris@42 264 }
Chris@42 265 break;
Chris@42 266
Chris@42 267 /* various even/odd boundary conditions */
Chris@42 268 case R2R_REDFT00:
Chris@42 269 isL1 = isR1 = 1;
Chris@42 270 goto mirrors;
Chris@42 271 case R2R_REDFT01:
Chris@42 272 isL1 = 1;
Chris@42 273 goto mirrors;
Chris@42 274 case R2R_REDFT10:
Chris@42 275 isL0 = isR0 = 1;
Chris@42 276 goto mirrors;
Chris@42 277 case R2R_REDFT11:
Chris@42 278 isL0 = 1;
Chris@42 279 isR0 = -1;
Chris@42 280 goto mirrors;
Chris@42 281 case R2R_RODFT00:
Chris@42 282 goto mirrors;
Chris@42 283 case R2R_RODFT01:
Chris@42 284 isR1 = 1;
Chris@42 285 goto mirrors;
Chris@42 286 case R2R_RODFT10:
Chris@42 287 isL0 = isR0 = -1;
Chris@42 288 goto mirrors;
Chris@42 289 case R2R_RODFT11:
Chris@42 290 isL0 = -1;
Chris@42 291 isR0 = 1;
Chris@42 292 goto mirrors;
Chris@42 293
Chris@42 294 mirrors:
Chris@42 295
Chris@42 296 for (ia = 0; ia < na; ++ia)
Chris@42 297 b[(ib * n + n - 1) * na + ia] =
Chris@42 298 isR0 * a[(ib * n + n - 1) * na + ia]
Chris@42 299 + (n > 1 ? isR1 * a[(ib * n + n - 2) * na + ia]
Chris@42 300 : 0);
Chris@42 301
Chris@42 302 for (ia = 0; ia < na; ++ia)
Chris@42 303 b[(ib * n) * na + ia] +=
Chris@42 304 isL0 * a[(ib * n) * na + ia]
Chris@42 305 + (n > 1 ? isL1 * a[(ib * n + 1) * na + ia] : 0);
Chris@42 306
Chris@42 307 }
Chris@42 308
Chris@42 309 for (i = 1; i < n; ++i)
Chris@42 310 for (ia = 0; ia < na; ++ia)
Chris@42 311 b[(ib * n + i) * na + ia] +=
Chris@42 312 a[(ib * n + i - 1) * na + ia];
Chris@42 313 }
Chris@42 314 }
Chris@42 315
Chris@42 316 static void raphase_shift(R *b, R *a, int n, int nb, int na,
Chris@42 317 int n0, int k0, trigfun t)
Chris@42 318 {
Chris@42 319 int j, jb, ja;
Chris@42 320
Chris@42 321 for (jb = 0; jb < nb; ++jb)
Chris@42 322 for (j = 0; j < n; ++j) {
Chris@42 323 trigreal c = 2.0 * t(1, j + k0, n0);
Chris@42 324
Chris@42 325 for (ja = 0; ja < na; ++ja) {
Chris@42 326 int k = (jb * n + j) * na + ja;
Chris@42 327 b[k] = a[k] * c;
Chris@42 328 }
Chris@42 329 }
Chris@42 330 }
Chris@42 331
Chris@42 332 /* A = alpha * A (real, in place) */
Chris@42 333 static void rascale(R *a, R alpha, int n)
Chris@42 334 {
Chris@42 335 int i;
Chris@42 336
Chris@42 337 for (i = 0; i < n; ++i) {
Chris@42 338 a[i] *= alpha;
Chris@42 339 }
Chris@42 340 }
Chris@42 341
Chris@42 342 /*
Chris@42 343 * compute rdft:
Chris@42 344 */
Chris@42 345
Chris@42 346 /* copy real A into real B, using output stride of A and input stride of B */
Chris@42 347 typedef struct {
Chris@42 348 dotens2_closure k;
Chris@42 349 R *ra;
Chris@42 350 R *rb;
Chris@42 351 } cpyr_closure;
Chris@42 352
Chris@42 353 static void cpyr0(dotens2_closure *k_,
Chris@42 354 int indxa, int ondxa, int indxb, int ondxb)
Chris@42 355 {
Chris@42 356 cpyr_closure *k = (cpyr_closure *)k_;
Chris@42 357 k->rb[indxb] = k->ra[ondxa];
Chris@42 358 UNUSED(indxa); UNUSED(ondxb);
Chris@42 359 }
Chris@42 360
Chris@42 361 static void cpyr(R *ra, bench_tensor *sza, R *rb, bench_tensor *szb)
Chris@42 362 {
Chris@42 363 cpyr_closure k;
Chris@42 364 k.k.apply = cpyr0;
Chris@42 365 k.ra = ra; k.rb = rb;
Chris@42 366 bench_dotens2(sza, szb, &k.k);
Chris@42 367 }
Chris@42 368
Chris@42 369 static void dofft(info *nfo, R *in, R *out)
Chris@42 370 {
Chris@42 371 cpyr(in, nfo->pckdsz, (R *) nfo->p->in, nfo->totalsz);
Chris@42 372 after_problem_rcopy_from(nfo->p, (bench_real *)nfo->p->in);
Chris@42 373 doit(1, nfo->p);
Chris@42 374 after_problem_rcopy_to(nfo->p, (bench_real *)nfo->p->out);
Chris@42 375 cpyr((R *) nfo->p->out, nfo->totalsz, out, nfo->pckdsz);
Chris@42 376 }
Chris@42 377
Chris@42 378 static double racmp(R *a, R *b, int n, const char *test, double tol)
Chris@42 379 {
Chris@42 380 double d = raerror(a, b, n);
Chris@42 381 if (d > tol) {
Chris@42 382 ovtpvt_err("Found relative error %e (%s)\n", d, test);
Chris@42 383 {
Chris@42 384 int i, N;
Chris@42 385 N = n > 300 && verbose <= 2 ? 300 : n;
Chris@42 386 for (i = 0; i < N; ++i)
Chris@42 387 ovtpvt_err("%8d %16.12f %16.12f\n", i,
Chris@42 388 (double) a[i],
Chris@42 389 (double) b[i]);
Chris@42 390 }
Chris@42 391 bench_exit(EXIT_FAILURE);
Chris@42 392 }
Chris@42 393 return d;
Chris@42 394 }
Chris@42 395
Chris@42 396 /***********************************************************************/
Chris@42 397
Chris@42 398 typedef struct {
Chris@42 399 int n; /* physical size */
Chris@42 400 int n0; /* "logical" transform size */
Chris@42 401 int i0, k0; /* shifts of input/output */
Chris@42 402 trigfun ti, ts; /* impulse/shift trig functions */
Chris@42 403 } dim_stuff;
Chris@42 404
Chris@42 405 static void impulse_response(int rnk, dim_stuff *d, R impulse_amp,
Chris@42 406 R *A, int N)
Chris@42 407 {
Chris@42 408 if (rnk == 0)
Chris@42 409 A[0] = impulse_amp;
Chris@42 410 else {
Chris@42 411 int i;
Chris@42 412 N /= d->n;
Chris@42 413 for (i = 0; i < d->n; ++i) {
Chris@42 414 impulse_response(rnk - 1, d + 1,
Chris@42 415 impulse_amp * d->ti(d->i0, d->k0 + i, d->n0),
Chris@42 416 A + i * N, N);
Chris@42 417 }
Chris@42 418 }
Chris@42 419 }
Chris@42 420
Chris@42 421 /***************************************************************************/
Chris@42 422
Chris@42 423 /*
Chris@42 424 * Implementation of the FFT tester described in
Chris@42 425 *
Chris@42 426 * Funda Ergün. Testing multivariate linear functions: Overcoming the
Chris@42 427 * generator bottleneck. In Proceedings of the Twenty-Seventh Annual
Chris@42 428 * ACM Symposium on the Theory of Computing, pages 407-416, Las Vegas,
Chris@42 429 * Nevada, 29 May--1 June 1995.
Chris@42 430 *
Chris@42 431 * Also: F. Ergun, S. R. Kumar, and D. Sivakumar, "Self-testing without
Chris@42 432 * the generator bottleneck," SIAM J. on Computing 29 (5), 1630-51 (2000).
Chris@42 433 */
Chris@42 434
Chris@42 435 static double rlinear(int n, info *nfo, R *inA, R *inB, R *inC, R *outA,
Chris@42 436 R *outB, R *outC, R *tmp, int rounds, double tol)
Chris@42 437 {
Chris@42 438 double e = 0.0;
Chris@42 439 int j;
Chris@42 440
Chris@42 441 for (j = 0; j < rounds; ++j) {
Chris@42 442 R alpha, beta;
Chris@42 443 alpha = mydrand();
Chris@42 444 beta = mydrand();
Chris@42 445 rarand(inA, n);
Chris@42 446 rarand(inB, n);
Chris@42 447 dofft(nfo, inA, outA);
Chris@42 448 dofft(nfo, inB, outB);
Chris@42 449
Chris@42 450 rascale(outA, alpha, n);
Chris@42 451 rascale(outB, beta, n);
Chris@42 452 raadd(tmp, outA, outB, n);
Chris@42 453 rascale(inA, alpha, n);
Chris@42 454 rascale(inB, beta, n);
Chris@42 455 raadd(inC, inA, inB, n);
Chris@42 456 dofft(nfo, inC, outC);
Chris@42 457
Chris@42 458 e = dmax(e, racmp(outC, tmp, n, "linear", tol));
Chris@42 459 }
Chris@42 460 return e;
Chris@42 461 }
Chris@42 462
Chris@42 463 static double rimpulse(dim_stuff *d, R impulse_amp,
Chris@42 464 int n, int vecn, info *nfo,
Chris@42 465 R *inA, R *inB, R *inC,
Chris@42 466 R *outA, R *outB, R *outC,
Chris@42 467 R *tmp, int rounds, double tol)
Chris@42 468 {
Chris@42 469 double e = 0.0;
Chris@42 470 int N = n * vecn;
Chris@42 471 int i;
Chris@42 472 int j;
Chris@42 473
Chris@42 474 /* test 2: check that the unit impulse is transformed properly */
Chris@42 475
Chris@42 476 for (i = 0; i < N; ++i) {
Chris@42 477 /* pls */
Chris@42 478 inA[i] = 0.0;
Chris@42 479 }
Chris@42 480 for (i = 0; i < vecn; ++i) {
Chris@42 481 inA[i * n] = (i+1) / (double)(vecn+1);
Chris@42 482
Chris@42 483 /* transform of the pls */
Chris@42 484 impulse_response(nfo->probsz->rnk, d, impulse_amp * inA[i * n],
Chris@42 485 outA + i * n, n);
Chris@42 486 }
Chris@42 487
Chris@42 488 dofft(nfo, inA, tmp);
Chris@42 489 e = dmax(e, racmp(tmp, outA, N, "impulse 1", tol));
Chris@42 490
Chris@42 491 for (j = 0; j < rounds; ++j) {
Chris@42 492 rarand(inB, N);
Chris@42 493 rasub(inC, inA, inB, N);
Chris@42 494 dofft(nfo, inB, outB);
Chris@42 495 dofft(nfo, inC, outC);
Chris@42 496 raadd(tmp, outB, outC, N);
Chris@42 497 e = dmax(e, racmp(tmp, outA, N, "impulse", tol));
Chris@42 498 }
Chris@42 499 return e;
Chris@42 500 }
Chris@42 501
Chris@42 502 static double t_shift(int n, int vecn, info *nfo,
Chris@42 503 R *inA, R *inB, R *outA, R *outB, R *tmp,
Chris@42 504 int rounds, double tol,
Chris@42 505 dim_stuff *d)
Chris@42 506 {
Chris@42 507 double e = 0.0;
Chris@42 508 int nb, na, dim, N = n * vecn;
Chris@42 509 int i, j;
Chris@42 510 bench_tensor *sz = nfo->probsz;
Chris@42 511
Chris@42 512 /* test 3: check the time-shift property */
Chris@42 513 /* the paper performs more tests, but this code should be fine too */
Chris@42 514
Chris@42 515 nb = 1;
Chris@42 516 na = n;
Chris@42 517
Chris@42 518 /* check shifts across all SZ dimensions */
Chris@42 519 for (dim = 0; dim < sz->rnk; ++dim) {
Chris@42 520 int ncur = sz->dims[dim].n;
Chris@42 521
Chris@42 522 na /= ncur;
Chris@42 523
Chris@42 524 for (j = 0; j < rounds; ++j) {
Chris@42 525 rarand(inA, N);
Chris@42 526
Chris@42 527 for (i = 0; i < vecn; ++i) {
Chris@42 528 rarolr(inB + i * n, inA + i*n, ncur, nb,na,
Chris@42 529 nfo->p->k[dim]);
Chris@42 530 }
Chris@42 531 dofft(nfo, inA, outA);
Chris@42 532 dofft(nfo, inB, outB);
Chris@42 533 for (i = 0; i < vecn; ++i)
Chris@42 534 raphase_shift(tmp + i * n, outA + i * n, ncur,
Chris@42 535 nb, na, d[dim].n0, d[dim].k0, d[dim].ts);
Chris@42 536 e = dmax(e, racmp(tmp, outB, N, "time shift", tol));
Chris@42 537 }
Chris@42 538
Chris@42 539 nb *= ncur;
Chris@42 540 }
Chris@42 541 return e;
Chris@42 542 }
Chris@42 543
Chris@42 544 /***********************************************************************/
Chris@42 545
Chris@42 546 void verify_r2r(bench_problem *p, int rounds, double tol, errors *e)
Chris@42 547 {
Chris@42 548 R *inA, *inB, *inC, *outA, *outB, *outC, *tmp;
Chris@42 549 info nfo;
Chris@42 550 int n, vecn, N;
Chris@42 551 double impulse_amp = 1.0;
Chris@42 552 dim_stuff *d;
Chris@42 553 int i;
Chris@42 554
Chris@42 555 if (rounds == 0)
Chris@42 556 rounds = 20; /* default value */
Chris@42 557
Chris@42 558 n = tensor_sz(p->sz);
Chris@42 559 vecn = tensor_sz(p->vecsz);
Chris@42 560 N = n * vecn;
Chris@42 561
Chris@42 562 d = (dim_stuff *) bench_malloc(sizeof(dim_stuff) * p->sz->rnk);
Chris@42 563 for (i = 0; i < p->sz->rnk; ++i) {
Chris@42 564 int n0, i0, k0;
Chris@42 565 trigfun ti, ts;
Chris@42 566
Chris@42 567 d[i].n = n0 = p->sz->dims[i].n;
Chris@42 568 if (p->k[i] > R2R_DHT)
Chris@42 569 n0 = 2 * (n0 + (p->k[i] == R2R_REDFT00 ? -1 :
Chris@42 570 (p->k[i] == R2R_RODFT00 ? 1 : 0)));
Chris@42 571
Chris@42 572 switch (p->k[i]) {
Chris@42 573 case R2R_R2HC:
Chris@42 574 i0 = k0 = 0;
Chris@42 575 ti = realhalf;
Chris@42 576 ts = coshalf;
Chris@42 577 break;
Chris@42 578 case R2R_DHT:
Chris@42 579 i0 = k0 = 0;
Chris@42 580 ti = unity;
Chris@42 581 ts = cos00;
Chris@42 582 break;
Chris@42 583 case R2R_HC2R:
Chris@42 584 i0 = k0 = 0;
Chris@42 585 ti = unity;
Chris@42 586 ts = cos00;
Chris@42 587 break;
Chris@42 588 case R2R_REDFT00:
Chris@42 589 i0 = k0 = 0;
Chris@42 590 ti = ts = cos00;
Chris@42 591 break;
Chris@42 592 case R2R_REDFT01:
Chris@42 593 i0 = k0 = 0;
Chris@42 594 ti = ts = cos01;
Chris@42 595 break;
Chris@42 596 case R2R_REDFT10:
Chris@42 597 i0 = k0 = 0;
Chris@42 598 ti = cos10; impulse_amp *= 2.0;
Chris@42 599 ts = cos00;
Chris@42 600 break;
Chris@42 601 case R2R_REDFT11:
Chris@42 602 i0 = k0 = 0;
Chris@42 603 ti = cos11; impulse_amp *= 2.0;
Chris@42 604 ts = cos01;
Chris@42 605 break;
Chris@42 606 case R2R_RODFT00:
Chris@42 607 i0 = k0 = 1;
Chris@42 608 ti = sin00; impulse_amp *= 2.0;
Chris@42 609 ts = cos00;
Chris@42 610 break;
Chris@42 611 case R2R_RODFT01:
Chris@42 612 i0 = 1; k0 = 0;
Chris@42 613 ti = sin01; impulse_amp *= n == 1 ? 1.0 : 2.0;
Chris@42 614 ts = cos01;
Chris@42 615 break;
Chris@42 616 case R2R_RODFT10:
Chris@42 617 i0 = 0; k0 = 1;
Chris@42 618 ti = sin10; impulse_amp *= 2.0;
Chris@42 619 ts = cos00;
Chris@42 620 break;
Chris@42 621 case R2R_RODFT11:
Chris@42 622 i0 = k0 = 0;
Chris@42 623 ti = sin11; impulse_amp *= 2.0;
Chris@42 624 ts = cos01;
Chris@42 625 break;
Chris@42 626 default:
Chris@42 627 BENCH_ASSERT(0);
Chris@42 628 return;
Chris@42 629 }
Chris@42 630
Chris@42 631 d[i].n0 = n0;
Chris@42 632 d[i].i0 = i0;
Chris@42 633 d[i].k0 = k0;
Chris@42 634 d[i].ti = ti;
Chris@42 635 d[i].ts = ts;
Chris@42 636 }
Chris@42 637
Chris@42 638
Chris@42 639 inA = (R *) bench_malloc(N * sizeof(R));
Chris@42 640 inB = (R *) bench_malloc(N * sizeof(R));
Chris@42 641 inC = (R *) bench_malloc(N * sizeof(R));
Chris@42 642 outA = (R *) bench_malloc(N * sizeof(R));
Chris@42 643 outB = (R *) bench_malloc(N * sizeof(R));
Chris@42 644 outC = (R *) bench_malloc(N * sizeof(R));
Chris@42 645 tmp = (R *) bench_malloc(N * sizeof(R));
Chris@42 646
Chris@42 647 nfo.p = p;
Chris@42 648 nfo.probsz = p->sz;
Chris@42 649 nfo.totalsz = tensor_append(p->vecsz, nfo.probsz);
Chris@42 650 nfo.pckdsz = verify_pack(nfo.totalsz, 1);
Chris@42 651 nfo.pckdvecsz = verify_pack(p->vecsz, tensor_sz(nfo.probsz));
Chris@42 652
Chris@42 653 e->i = rimpulse(d, impulse_amp, n, vecn, &nfo,
Chris@42 654 inA, inB, inC, outA, outB, outC, tmp, rounds, tol);
Chris@42 655 e->l = rlinear(N, &nfo, inA, inB, inC, outA, outB, outC, tmp, rounds,tol);
Chris@42 656 e->s = t_shift(n, vecn, &nfo, inA, inB, outA, outB, tmp,
Chris@42 657 rounds, tol, d);
Chris@42 658
Chris@42 659 /* grr, verify-lib.c:preserves_input() only works for complex */
Chris@42 660 if (!p->in_place && !p->destroy_input) {
Chris@42 661 bench_tensor *totalsz_swap, *pckdsz_swap;
Chris@42 662 totalsz_swap = tensor_copy_swapio(nfo.totalsz);
Chris@42 663 pckdsz_swap = tensor_copy_swapio(nfo.pckdsz);
Chris@42 664
Chris@42 665 for (i = 0; i < rounds; ++i) {
Chris@42 666 rarand(inA, N);
Chris@42 667 dofft(&nfo, inA, outB);
Chris@42 668 cpyr((R *) nfo.p->in, totalsz_swap, inB, pckdsz_swap);
Chris@42 669 racmp(inB, inA, N, "preserves_input", 0.0);
Chris@42 670 }
Chris@42 671
Chris@42 672 tensor_destroy(totalsz_swap);
Chris@42 673 tensor_destroy(pckdsz_swap);
Chris@42 674 }
Chris@42 675
Chris@42 676 tensor_destroy(nfo.totalsz);
Chris@42 677 tensor_destroy(nfo.pckdsz);
Chris@42 678 tensor_destroy(nfo.pckdvecsz);
Chris@42 679 bench_free(tmp);
Chris@42 680 bench_free(outC);
Chris@42 681 bench_free(outB);
Chris@42 682 bench_free(outA);
Chris@42 683 bench_free(inC);
Chris@42 684 bench_free(inB);
Chris@42 685 bench_free(inA);
Chris@42 686 bench_free(d);
Chris@42 687 }
Chris@42 688
Chris@42 689
Chris@42 690 typedef struct {
Chris@42 691 dofft_closure k;
Chris@42 692 bench_problem *p;
Chris@42 693 int n0;
Chris@42 694 } dofft_r2r_closure;
Chris@42 695
Chris@42 696 static void cpyr1(int n, R *in, int is, R *out, int os, R scale)
Chris@42 697 {
Chris@42 698 int i;
Chris@42 699 for (i = 0; i < n; ++i)
Chris@42 700 out[i * os] = in[i * is] * scale;
Chris@42 701 }
Chris@42 702
Chris@42 703 static void mke00(C *a, int n, int c)
Chris@42 704 {
Chris@42 705 int i;
Chris@42 706 for (i = 1; i + i < n; ++i)
Chris@42 707 a[n - i][c] = a[i][c];
Chris@42 708 }
Chris@42 709
Chris@42 710 static void mkre00(C *a, int n)
Chris@42 711 {
Chris@42 712 mkreal(a, n);
Chris@42 713 mke00(a, n, 0);
Chris@42 714 }
Chris@42 715
Chris@42 716 static void mkimag(C *a, int n)
Chris@42 717 {
Chris@42 718 int i;
Chris@42 719 for (i = 0; i < n; ++i)
Chris@42 720 c_re(a[i]) = 0.0;
Chris@42 721 }
Chris@42 722
Chris@42 723 static void mko00(C *a, int n, int c)
Chris@42 724 {
Chris@42 725 int i;
Chris@42 726 a[0][c] = 0.0;
Chris@42 727 for (i = 1; i + i < n; ++i)
Chris@42 728 a[n - i][c] = -a[i][c];
Chris@42 729 if (i + i == n)
Chris@42 730 a[i][c] = 0.0;
Chris@42 731 }
Chris@42 732
Chris@42 733 static void mkro00(C *a, int n)
Chris@42 734 {
Chris@42 735 mkreal(a, n);
Chris@42 736 mko00(a, n, 0);
Chris@42 737 }
Chris@42 738
Chris@42 739 static void mkio00(C *a, int n)
Chris@42 740 {
Chris@42 741 mkimag(a, n);
Chris@42 742 mko00(a, n, 1);
Chris@42 743 }
Chris@42 744
Chris@42 745 static void mkre01(C *a, int n) /* n should be be multiple of 4 */
Chris@42 746 {
Chris@42 747 R a0;
Chris@42 748 a0 = c_re(a[0]);
Chris@42 749 mko00(a, n/2, 0);
Chris@42 750 c_re(a[n/2]) = -(c_re(a[0]) = a0);
Chris@42 751 mkre00(a, n);
Chris@42 752 }
Chris@42 753
Chris@42 754 static void mkro01(C *a, int n) /* n should be be multiple of 4 */
Chris@42 755 {
Chris@42 756 c_re(a[0]) = c_im(a[0]) = 0.0;
Chris@42 757 mkre00(a, n/2);
Chris@42 758 mkro00(a, n);
Chris@42 759 }
Chris@42 760
Chris@42 761 static void mkoddonly(C *a, int n)
Chris@42 762 {
Chris@42 763 int i;
Chris@42 764 for (i = 0; i < n; i += 2)
Chris@42 765 c_re(a[i]) = c_im(a[i]) = 0.0;
Chris@42 766 }
Chris@42 767
Chris@42 768 static void mkre10(C *a, int n)
Chris@42 769 {
Chris@42 770 mkoddonly(a, n);
Chris@42 771 mkre00(a, n);
Chris@42 772 }
Chris@42 773
Chris@42 774 static void mkio10(C *a, int n)
Chris@42 775 {
Chris@42 776 mkoddonly(a, n);
Chris@42 777 mkio00(a, n);
Chris@42 778 }
Chris@42 779
Chris@42 780 static void mkre11(C *a, int n)
Chris@42 781 {
Chris@42 782 mkoddonly(a, n);
Chris@42 783 mko00(a, n/2, 0);
Chris@42 784 mkre00(a, n);
Chris@42 785 }
Chris@42 786
Chris@42 787 static void mkro11(C *a, int n)
Chris@42 788 {
Chris@42 789 mkoddonly(a, n);
Chris@42 790 mkre00(a, n/2);
Chris@42 791 mkro00(a, n);
Chris@42 792 }
Chris@42 793
Chris@42 794 static void mkio11(C *a, int n)
Chris@42 795 {
Chris@42 796 mkoddonly(a, n);
Chris@42 797 mke00(a, n/2, 1);
Chris@42 798 mkio00(a, n);
Chris@42 799 }
Chris@42 800
Chris@42 801 static void r2r_apply(dofft_closure *k_, bench_complex *in, bench_complex *out)
Chris@42 802 {
Chris@42 803 dofft_r2r_closure *k = (dofft_r2r_closure *)k_;
Chris@42 804 bench_problem *p = k->p;
Chris@42 805 bench_real *ri, *ro;
Chris@42 806 int n, is, os;
Chris@42 807
Chris@42 808 n = p->sz->dims[0].n;
Chris@42 809 is = p->sz->dims[0].is;
Chris@42 810 os = p->sz->dims[0].os;
Chris@42 811
Chris@42 812 ri = (bench_real *) p->in;
Chris@42 813 ro = (bench_real *) p->out;
Chris@42 814
Chris@42 815 switch (p->k[0]) {
Chris@42 816 case R2R_R2HC:
Chris@42 817 cpyr1(n, &c_re(in[0]), 2, ri, is, 1.0);
Chris@42 818 break;
Chris@42 819 case R2R_HC2R:
Chris@42 820 cpyr1(n/2 + 1, &c_re(in[0]), 2, ri, is, 1.0);
Chris@42 821 cpyr1((n+1)/2 - 1, &c_im(in[n-1]), -2, ri + is*(n-1), -is, 1.0);
Chris@42 822 break;
Chris@42 823 case R2R_REDFT00:
Chris@42 824 cpyr1(n, &c_re(in[0]), 2, ri, is, 1.0);
Chris@42 825 break;
Chris@42 826 case R2R_RODFT00:
Chris@42 827 cpyr1(n, &c_re(in[1]), 2, ri, is, 1.0);
Chris@42 828 break;
Chris@42 829 case R2R_REDFT01:
Chris@42 830 cpyr1(n, &c_re(in[0]), 2, ri, is, 1.0);
Chris@42 831 break;
Chris@42 832 case R2R_REDFT10:
Chris@42 833 cpyr1(n, &c_re(in[1]), 4, ri, is, 1.0);
Chris@42 834 break;
Chris@42 835 case R2R_RODFT01:
Chris@42 836 cpyr1(n, &c_re(in[1]), 2, ri, is, 1.0);
Chris@42 837 break;
Chris@42 838 case R2R_RODFT10:
Chris@42 839 cpyr1(n, &c_im(in[1]), 4, ri, is, 1.0);
Chris@42 840 break;
Chris@42 841 case R2R_REDFT11:
Chris@42 842 cpyr1(n, &c_re(in[1]), 4, ri, is, 1.0);
Chris@42 843 break;
Chris@42 844 case R2R_RODFT11:
Chris@42 845 cpyr1(n, &c_re(in[1]), 4, ri, is, 1.0);
Chris@42 846 break;
Chris@42 847 default:
Chris@42 848 BENCH_ASSERT(0); /* not yet implemented */
Chris@42 849 }
Chris@42 850
Chris@42 851 after_problem_rcopy_from(p, ri);
Chris@42 852 doit(1, p);
Chris@42 853 after_problem_rcopy_to(p, ro);
Chris@42 854
Chris@42 855 switch (p->k[0]) {
Chris@42 856 case R2R_R2HC:
Chris@42 857 if (k->k.recopy_input)
Chris@42 858 cpyr1(n, ri, is, &c_re(in[0]), 2, 1.0);
Chris@42 859 cpyr1(n/2 + 1, ro, os, &c_re(out[0]), 2, 1.0);
Chris@42 860 cpyr1((n+1)/2 - 1, ro + os*(n-1), -os, &c_im(out[1]), 2, 1.0);
Chris@42 861 c_im(out[0]) = 0.0;
Chris@42 862 if (n % 2 == 0)
Chris@42 863 c_im(out[n/2]) = 0.0;
Chris@42 864 mkhermitian1(out, n);
Chris@42 865 break;
Chris@42 866 case R2R_HC2R:
Chris@42 867 if (k->k.recopy_input) {
Chris@42 868 cpyr1(n/2 + 1, ri, is, &c_re(in[0]), 2, 1.0);
Chris@42 869 cpyr1((n+1)/2 - 1, ri + is*(n-1), -is, &c_im(in[1]), 2,1.0);
Chris@42 870 }
Chris@42 871 cpyr1(n, ro, os, &c_re(out[0]), 2, 1.0);
Chris@42 872 mkreal(out, n);
Chris@42 873 break;
Chris@42 874 case R2R_REDFT00:
Chris@42 875 if (k->k.recopy_input)
Chris@42 876 cpyr1(n, ri, is, &c_re(in[0]), 2, 1.0);
Chris@42 877 cpyr1(n, ro, os, &c_re(out[0]), 2, 1.0);
Chris@42 878 mkre00(out, k->n0);
Chris@42 879 break;
Chris@42 880 case R2R_RODFT00:
Chris@42 881 if (k->k.recopy_input)
Chris@42 882 cpyr1(n, ri, is, &c_im(in[1]), 2, -1.0);
Chris@42 883 cpyr1(n, ro, os, &c_im(out[1]), 2, -1.0);
Chris@42 884 mkio00(out, k->n0);
Chris@42 885 break;
Chris@42 886 case R2R_REDFT01:
Chris@42 887 if (k->k.recopy_input)
Chris@42 888 cpyr1(n, ri, is, &c_re(in[0]), 2, 1.0);
Chris@42 889 cpyr1(n, ro, os, &c_re(out[1]), 4, 2.0);
Chris@42 890 mkre10(out, k->n0);
Chris@42 891 break;
Chris@42 892 case R2R_REDFT10:
Chris@42 893 if (k->k.recopy_input)
Chris@42 894 cpyr1(n, ri, is, &c_re(in[1]), 4, 2.0);
Chris@42 895 cpyr1(n, ro, os, &c_re(out[0]), 2, 1.0);
Chris@42 896 mkre01(out, k->n0);
Chris@42 897 break;
Chris@42 898 case R2R_RODFT01:
Chris@42 899 if (k->k.recopy_input)
Chris@42 900 cpyr1(n, ri, is, &c_re(in[1]), 2, 1.0);
Chris@42 901 cpyr1(n, ro, os, &c_im(out[1]), 4, -2.0);
Chris@42 902 mkio10(out, k->n0);
Chris@42 903 break;
Chris@42 904 case R2R_RODFT10:
Chris@42 905 if (k->k.recopy_input)
Chris@42 906 cpyr1(n, ri, is, &c_im(in[1]), 4, -2.0);
Chris@42 907 cpyr1(n, ro, os, &c_re(out[1]), 2, 1.0);
Chris@42 908 mkro01(out, k->n0);
Chris@42 909 break;
Chris@42 910 case R2R_REDFT11:
Chris@42 911 if (k->k.recopy_input)
Chris@42 912 cpyr1(n, ri, is, &c_re(in[1]), 4, 2.0);
Chris@42 913 cpyr1(n, ro, os, &c_re(out[1]), 4, 2.0);
Chris@42 914 mkre11(out, k->n0);
Chris@42 915 break;
Chris@42 916 case R2R_RODFT11:
Chris@42 917 if (k->k.recopy_input)
Chris@42 918 cpyr1(n, ri, is, &c_im(in[1]), 4, -2.0);
Chris@42 919 cpyr1(n, ro, os, &c_im(out[1]), 4, -2.0);
Chris@42 920 mkio11(out, k->n0);
Chris@42 921 break;
Chris@42 922 default:
Chris@42 923 BENCH_ASSERT(0); /* not yet implemented */
Chris@42 924 }
Chris@42 925 }
Chris@42 926
Chris@42 927 void accuracy_r2r(bench_problem *p, int rounds, int impulse_rounds,
Chris@42 928 double t[6])
Chris@42 929 {
Chris@42 930 dofft_r2r_closure k;
Chris@42 931 int n, n0 = 1;
Chris@42 932 C *a, *b;
Chris@42 933 aconstrain constrain = 0;
Chris@42 934
Chris@42 935 BENCH_ASSERT(p->kind == PROBLEM_R2R);
Chris@42 936 BENCH_ASSERT(p->sz->rnk == 1);
Chris@42 937 BENCH_ASSERT(p->vecsz->rnk == 0);
Chris@42 938
Chris@42 939 k.k.apply = r2r_apply;
Chris@42 940 k.k.recopy_input = 0;
Chris@42 941 k.p = p;
Chris@42 942 n = tensor_sz(p->sz);
Chris@42 943
Chris@42 944 switch (p->k[0]) {
Chris@42 945 case R2R_R2HC: constrain = mkreal; n0 = n; break;
Chris@42 946 case R2R_HC2R: constrain = mkhermitian1; n0 = n; break;
Chris@42 947 case R2R_REDFT00: constrain = mkre00; n0 = 2*(n-1); break;
Chris@42 948 case R2R_RODFT00: constrain = mkro00; n0 = 2*(n+1); break;
Chris@42 949 case R2R_REDFT01: constrain = mkre01; n0 = 4*n; break;
Chris@42 950 case R2R_REDFT10: constrain = mkre10; n0 = 4*n; break;
Chris@42 951 case R2R_RODFT01: constrain = mkro01; n0 = 4*n; break;
Chris@42 952 case R2R_RODFT10: constrain = mkio10; n0 = 4*n; break;
Chris@42 953 case R2R_REDFT11: constrain = mkre11; n0 = 8*n; break;
Chris@42 954 case R2R_RODFT11: constrain = mkro11; n0 = 8*n; break;
Chris@42 955 default: BENCH_ASSERT(0); /* not yet implemented */
Chris@42 956 }
Chris@42 957 k.n0 = n0;
Chris@42 958
Chris@42 959 a = (C *) bench_malloc(n0 * sizeof(C));
Chris@42 960 b = (C *) bench_malloc(n0 * sizeof(C));
Chris@42 961 accuracy_test(&k.k, constrain, -1, n0, a, b, rounds, impulse_rounds, t);
Chris@42 962 bench_free(b);
Chris@42 963 bench_free(a);
Chris@42 964 }