annotate fft/fftw/fftw-3.3.4/libbench2/mp.c @ 40:223f770b5341 kissfft-double tip

Try a double-precision kissfft
author Chris Cannam
date Wed, 07 Sep 2016 10:40:32 +0100
parents 26056e866c29
children
rev   line source
Chris@19 1 #include "config.h"
Chris@19 2 #include "bench.h"
Chris@19 3 #include <math.h>
Chris@19 4
Chris@19 5 #define DG unsigned short
Chris@19 6 #define ACC unsigned long
Chris@19 7 #define REAL bench_real
Chris@19 8 #define BITS_IN_REAL 53 /* mantissa */
Chris@19 9
Chris@19 10 #define SHFT 16
Chris@19 11 #define RADIX 65536L
Chris@19 12 #define IRADIX (1.0 / RADIX)
Chris@19 13 #define LO(x) ((x) & (RADIX - 1))
Chris@19 14 #define HI(x) ((x) >> SHFT)
Chris@19 15 #define HI_SIGNED(x) \
Chris@19 16 ((((x) + (ACC)(RADIX >> 1) * RADIX) >> SHFT) - (RADIX >> 1))
Chris@19 17 #define ZEROEXP (-32768)
Chris@19 18
Chris@19 19 #define LEN 10
Chris@19 20
Chris@19 21 typedef struct {
Chris@19 22 short sign;
Chris@19 23 short expt;
Chris@19 24 DG d[LEN];
Chris@19 25 } N[1];
Chris@19 26
Chris@19 27 #define EXA a->expt
Chris@19 28 #define EXB b->expt
Chris@19 29 #define EXC c->expt
Chris@19 30
Chris@19 31 #define AD a->d
Chris@19 32 #define BD b->d
Chris@19 33
Chris@19 34 #define SGNA a->sign
Chris@19 35 #define SGNB b->sign
Chris@19 36
Chris@19 37 static const N zero = {{ 1, ZEROEXP, {0} }};
Chris@19 38
Chris@19 39 static void cpy(const N a, N b)
Chris@19 40 {
Chris@19 41 *b = *a;
Chris@19 42 }
Chris@19 43
Chris@19 44 static void fromreal(REAL x, N a)
Chris@19 45 {
Chris@19 46 int i, e;
Chris@19 47
Chris@19 48 cpy(zero, a);
Chris@19 49 if (x == 0.0) return;
Chris@19 50
Chris@19 51 if (x >= 0) { SGNA = 1; }
Chris@19 52 else { SGNA = -1; x = -x; }
Chris@19 53
Chris@19 54 e = 0;
Chris@19 55 while (x >= 1.0) { x *= IRADIX; ++e; }
Chris@19 56 while (x < IRADIX) { x *= RADIX; --e; }
Chris@19 57 EXA = e;
Chris@19 58
Chris@19 59 for (i = LEN - 1; i >= 0 && x != 0.0; --i) {
Chris@19 60 REAL y;
Chris@19 61
Chris@19 62 x *= RADIX;
Chris@19 63 y = (REAL) ((int) x);
Chris@19 64 AD[i] = (DG)y;
Chris@19 65 x -= y;
Chris@19 66 }
Chris@19 67 }
Chris@19 68
Chris@19 69 static void fromshort(int x, N a)
Chris@19 70 {
Chris@19 71 cpy(zero, a);
Chris@19 72
Chris@19 73 if (x < 0) { x = -x; SGNA = -1; }
Chris@19 74 else { SGNA = 1; }
Chris@19 75 EXA = 1;
Chris@19 76 AD[LEN - 1] = x;
Chris@19 77 }
Chris@19 78
Chris@19 79 static void pack(DG *d, int e, int s, int l, N a)
Chris@19 80 {
Chris@19 81 int i, j;
Chris@19 82
Chris@19 83 for (i = l - 1; i >= 0; --i, --e)
Chris@19 84 if (d[i] != 0)
Chris@19 85 break;
Chris@19 86
Chris@19 87 if (i < 0) {
Chris@19 88 /* number is zero */
Chris@19 89 cpy(zero, a);
Chris@19 90 } else {
Chris@19 91 EXA = e;
Chris@19 92 SGNA = s;
Chris@19 93
Chris@19 94 if (i >= LEN - 1) {
Chris@19 95 for (j = LEN - 1; j >= 0; --i, --j)
Chris@19 96 AD[j] = d[i];
Chris@19 97 } else {
Chris@19 98 for (j = LEN - 1; i >= 0; --i, --j)
Chris@19 99 AD[j] = d[i];
Chris@19 100 for ( ; j >= 0; --j)
Chris@19 101 AD[j] = 0;
Chris@19 102 }
Chris@19 103 }
Chris@19 104 }
Chris@19 105
Chris@19 106
Chris@19 107 /* compare absolute values */
Chris@19 108 static int abscmp(const N a, const N b)
Chris@19 109 {
Chris@19 110 int i;
Chris@19 111 if (EXA > EXB) return 1;
Chris@19 112 if (EXA < EXB) return -1;
Chris@19 113 for (i = LEN - 1; i >= 0; --i) {
Chris@19 114 if (AD[i] > BD[i])
Chris@19 115 return 1;
Chris@19 116 if (AD[i] < BD[i])
Chris@19 117 return -1;
Chris@19 118 }
Chris@19 119 return 0;
Chris@19 120 }
Chris@19 121
Chris@19 122 static int eq(const N a, const N b)
Chris@19 123 {
Chris@19 124 return (SGNA == SGNB) && (abscmp(a, b) == 0);
Chris@19 125 }
Chris@19 126
Chris@19 127 /* add magnitudes, for |a| >= |b| */
Chris@19 128 static void addmag0(int s, const N a, const N b, N c)
Chris@19 129 {
Chris@19 130 int ia, ib;
Chris@19 131 ACC r = 0;
Chris@19 132 DG d[LEN + 1];
Chris@19 133
Chris@19 134 for (ia = 0, ib = EXA - EXB; ib < LEN; ++ia, ++ib) {
Chris@19 135 r += (ACC)AD[ia] + (ACC)BD[ib];
Chris@19 136 d[ia] = LO(r);
Chris@19 137 r = HI(r);
Chris@19 138 }
Chris@19 139 for (; ia < LEN; ++ia) {
Chris@19 140 r += (ACC)AD[ia];
Chris@19 141 d[ia] = LO(r);
Chris@19 142 r = HI(r);
Chris@19 143 }
Chris@19 144 d[ia] = LO(r);
Chris@19 145 pack(d, EXA + 1, s * SGNA, LEN + 1, c);
Chris@19 146 }
Chris@19 147
Chris@19 148 static void addmag(int s, const N a, const N b, N c)
Chris@19 149 {
Chris@19 150 if (abscmp(a, b) > 0) addmag0(1, a, b, c); else addmag0(s, b, a, c);
Chris@19 151 }
Chris@19 152
Chris@19 153 /* subtract magnitudes, for |a| >= |b| */
Chris@19 154 static void submag0(int s, const N a, const N b, N c)
Chris@19 155 {
Chris@19 156 int ia, ib;
Chris@19 157 ACC r = 0;
Chris@19 158 DG d[LEN];
Chris@19 159
Chris@19 160 for (ia = 0, ib = EXA - EXB; ib < LEN; ++ia, ++ib) {
Chris@19 161 r += (ACC)AD[ia] - (ACC)BD[ib];
Chris@19 162 d[ia] = LO(r);
Chris@19 163 r = HI_SIGNED(r);
Chris@19 164 }
Chris@19 165 for (; ia < LEN; ++ia) {
Chris@19 166 r += (ACC)AD[ia];
Chris@19 167 d[ia] = LO(r);
Chris@19 168 r = HI_SIGNED(r);
Chris@19 169 }
Chris@19 170
Chris@19 171 pack(d, EXA, s * SGNA, LEN, c);
Chris@19 172 }
Chris@19 173
Chris@19 174 static void submag(int s, const N a, const N b, N c)
Chris@19 175 {
Chris@19 176 if (abscmp(a, b) > 0) submag0(1, a, b, c); else submag0(s, b, a, c);
Chris@19 177 }
Chris@19 178
Chris@19 179 /* c = a + b */
Chris@19 180 static void add(const N a, const N b, N c)
Chris@19 181 {
Chris@19 182 if (SGNA == SGNB) addmag(1, a, b, c); else submag(1, a, b, c);
Chris@19 183 }
Chris@19 184
Chris@19 185 static void sub(const N a, const N b, N c)
Chris@19 186 {
Chris@19 187 if (SGNA == SGNB) submag(-1, a, b, c); else addmag(-1, a, b, c);
Chris@19 188 }
Chris@19 189
Chris@19 190 static void mul(const N a, const N b, N c)
Chris@19 191 {
Chris@19 192 DG d[2 * LEN];
Chris@19 193 int i, j, k;
Chris@19 194 ACC r;
Chris@19 195
Chris@19 196 for (i = 0; i < LEN; ++i)
Chris@19 197 d[2 * i] = d[2 * i + 1] = 0;
Chris@19 198
Chris@19 199 for (i = 0; i < LEN; ++i) {
Chris@19 200 ACC ai = AD[i];
Chris@19 201 if (ai) {
Chris@19 202 r = 0;
Chris@19 203 for (j = 0, k = i; j < LEN; ++j, ++k) {
Chris@19 204 r += ai * (ACC)BD[j] + (ACC)d[k];
Chris@19 205 d[k] = LO(r);
Chris@19 206 r = HI(r);
Chris@19 207 }
Chris@19 208 d[k] = LO(r);
Chris@19 209 }
Chris@19 210 }
Chris@19 211
Chris@19 212 pack(d, EXA + EXB, SGNA * SGNB, 2 * LEN, c);
Chris@19 213 }
Chris@19 214
Chris@19 215 static REAL toreal(const N a)
Chris@19 216 {
Chris@19 217 REAL h, l, f;
Chris@19 218 int i, bits;
Chris@19 219 ACC r;
Chris@19 220 DG sticky;
Chris@19 221
Chris@19 222 if (EXA != ZEROEXP) {
Chris@19 223 f = IRADIX;
Chris@19 224 i = LEN;
Chris@19 225
Chris@19 226 bits = 0;
Chris@19 227 h = (r = AD[--i]) * f; f *= IRADIX;
Chris@19 228 for (bits = 0; r > 0; ++bits)
Chris@19 229 r >>= 1;
Chris@19 230
Chris@19 231 /* first digit */
Chris@19 232 while (bits + SHFT <= BITS_IN_REAL) {
Chris@19 233 h += AD[--i] * f; f *= IRADIX; bits += SHFT;
Chris@19 234 }
Chris@19 235
Chris@19 236 /* guard digit (leave one bit for sticky bit, hence `<' instead
Chris@19 237 of `<=') */
Chris@19 238 bits = 0; l = 0.0;
Chris@19 239 while (bits + SHFT < BITS_IN_REAL) {
Chris@19 240 l += AD[--i] * f; f *= IRADIX; bits += SHFT;
Chris@19 241 }
Chris@19 242
Chris@19 243 /* sticky bit */
Chris@19 244 sticky = 0;
Chris@19 245 while (i > 0)
Chris@19 246 sticky |= AD[--i];
Chris@19 247
Chris@19 248 if (sticky)
Chris@19 249 l += (RADIX / 2) * f;
Chris@19 250
Chris@19 251 h += l;
Chris@19 252
Chris@19 253 for (i = 0; i < EXA; ++i) h *= (REAL)RADIX;
Chris@19 254 for (i = 0; i > EXA; --i) h *= IRADIX;
Chris@19 255 if (SGNA == -1) h = -h;
Chris@19 256 return h;
Chris@19 257 } else {
Chris@19 258 return 0.0;
Chris@19 259 }
Chris@19 260 }
Chris@19 261
Chris@19 262 static void neg(N a)
Chris@19 263 {
Chris@19 264 SGNA = -SGNA;
Chris@19 265 }
Chris@19 266
Chris@19 267 static void inv(const N a, N x)
Chris@19 268 {
Chris@19 269 N w, z, one, two;
Chris@19 270
Chris@19 271 fromreal(1.0 / toreal(a), x); /* initial guess */
Chris@19 272 fromshort(1, one);
Chris@19 273 fromshort(2, two);
Chris@19 274
Chris@19 275 for (;;) {
Chris@19 276 /* Newton */
Chris@19 277 mul(a, x, w);
Chris@19 278 sub(two, w, z);
Chris@19 279 if (eq(one, z)) break;
Chris@19 280 mul(x, z, x);
Chris@19 281 }
Chris@19 282 }
Chris@19 283
Chris@19 284
Chris@19 285 /* 2 pi */
Chris@19 286 static const N n2pi = {{
Chris@19 287 1, 1,
Chris@19 288 {18450, 59017, 1760, 5212, 9779, 4518, 2886, 54545, 18558, 6}
Chris@19 289 }};
Chris@19 290
Chris@19 291 /* 1 / 31! */
Chris@19 292 static const N i31fac = {{
Chris@19 293 1, -7,
Chris@19 294 {28087, 45433, 51357, 24545, 14291, 3954, 57879, 8109, 38716, 41382}
Chris@19 295 }};
Chris@19 296
Chris@19 297
Chris@19 298 /* 1 / 32! */
Chris@19 299 static const N i32fac = {{
Chris@19 300 1, -7,
Chris@19 301 {52078, 60811, 3652, 39679, 37310, 47227, 28432, 57597, 13497, 1293}
Chris@19 302 }};
Chris@19 303
Chris@19 304 static void msin(const N a, N b)
Chris@19 305 {
Chris@19 306 N a2, g, k;
Chris@19 307 int i;
Chris@19 308
Chris@19 309 cpy(i31fac, g);
Chris@19 310 cpy(g, b);
Chris@19 311 mul(a, a, a2);
Chris@19 312
Chris@19 313 /* Taylor */
Chris@19 314 for (i = 31; i > 1; i -= 2) {
Chris@19 315 fromshort(i * (i - 1), k);
Chris@19 316 mul(k, g, g);
Chris@19 317 mul(a2, b, k);
Chris@19 318 sub(g, k, b);
Chris@19 319 }
Chris@19 320 mul(a, b, b);
Chris@19 321 }
Chris@19 322
Chris@19 323 static void mcos(const N a, N b)
Chris@19 324 {
Chris@19 325 N a2, g, k;
Chris@19 326 int i;
Chris@19 327
Chris@19 328 cpy(i32fac, g);
Chris@19 329 cpy(g, b);
Chris@19 330 mul(a, a, a2);
Chris@19 331
Chris@19 332 /* Taylor */
Chris@19 333 for (i = 32; i > 0; i -= 2) {
Chris@19 334 fromshort(i * (i - 1), k);
Chris@19 335 mul(k, g, g);
Chris@19 336 mul(a2, b, k);
Chris@19 337 sub(g, k, b);
Chris@19 338 }
Chris@19 339 }
Chris@19 340
Chris@19 341 static void by2pi(REAL m, REAL n, N a)
Chris@19 342 {
Chris@19 343 N b;
Chris@19 344
Chris@19 345 fromreal(n, b);
Chris@19 346 inv(b, a);
Chris@19 347 fromreal(m, b);
Chris@19 348 mul(a, b, a);
Chris@19 349 mul(n2pi, a, a);
Chris@19 350 }
Chris@19 351
Chris@19 352 static void sin2pi(REAL m, REAL n, N a);
Chris@19 353 static void cos2pi(REAL m, REAL n, N a)
Chris@19 354 {
Chris@19 355 N b;
Chris@19 356 if (m < 0) cos2pi(-m, n, a);
Chris@19 357 else if (m > n * 0.5) cos2pi(n - m, n, a);
Chris@19 358 else if (m > n * 0.25) {sin2pi(m - n * 0.25, n, a); neg(a);}
Chris@19 359 else if (m > n * 0.125) sin2pi(n * 0.25 - m, n, a);
Chris@19 360 else { by2pi(m, n, b); mcos(b, a); }
Chris@19 361 }
Chris@19 362
Chris@19 363 static void sin2pi(REAL m, REAL n, N a)
Chris@19 364 {
Chris@19 365 N b;
Chris@19 366 if (m < 0) {sin2pi(-m, n, a); neg(a);}
Chris@19 367 else if (m > n * 0.5) {sin2pi(n - m, n, a); neg(a);}
Chris@19 368 else if (m > n * 0.25) {cos2pi(m - n * 0.25, n, a);}
Chris@19 369 else if (m > n * 0.125) {cos2pi(n * 0.25 - m, n, a);}
Chris@19 370 else {by2pi(m, n, b); msin(b, a);}
Chris@19 371 }
Chris@19 372
Chris@19 373 /*----------------------------------------------------------------------*/
Chris@19 374 /* FFT stuff */
Chris@19 375
Chris@19 376 /* (r0 + i i0)(r1 + i i1) */
Chris@19 377 static void cmul(N r0, N i0, N r1, N i1, N r2, N i2)
Chris@19 378 {
Chris@19 379 N s, t, q;
Chris@19 380 mul(r0, r1, s);
Chris@19 381 mul(i0, i1, t);
Chris@19 382 sub(s, t, q);
Chris@19 383 mul(r0, i1, s);
Chris@19 384 mul(i0, r1, t);
Chris@19 385 add(s, t, i2);
Chris@19 386 cpy(q, r2);
Chris@19 387 }
Chris@19 388
Chris@19 389 /* (r0 - i i0)(r1 + i i1) */
Chris@19 390 static void cmulj(N r0, N i0, N r1, N i1, N r2, N i2)
Chris@19 391 {
Chris@19 392 N s, t, q;
Chris@19 393 mul(r0, r1, s);
Chris@19 394 mul(i0, i1, t);
Chris@19 395 add(s, t, q);
Chris@19 396 mul(r0, i1, s);
Chris@19 397 mul(i0, r1, t);
Chris@19 398 sub(s, t, i2);
Chris@19 399 cpy(q, r2);
Chris@19 400 }
Chris@19 401
Chris@19 402 static void mcexp(int m, int n, N r, N i)
Chris@19 403 {
Chris@19 404 static int cached_n = -1;
Chris@19 405 static N w[64][2];
Chris@19 406 int k, j;
Chris@19 407 if (n != cached_n) {
Chris@19 408 for (j = 1, k = 0; j < n; j += j, ++k) {
Chris@19 409 cos2pi(j, n, w[k][0]);
Chris@19 410 sin2pi(j, n, w[k][1]);
Chris@19 411 }
Chris@19 412 cached_n = n;
Chris@19 413 }
Chris@19 414
Chris@19 415 fromshort(1, r);
Chris@19 416 fromshort(0, i);
Chris@19 417 if (m > 0) {
Chris@19 418 for (k = 0; m; ++k, m >>= 1)
Chris@19 419 if (m & 1)
Chris@19 420 cmul(w[k][0], w[k][1], r, i, r, i);
Chris@19 421 } else {
Chris@19 422 m = -m;
Chris@19 423 for (k = 0; m; ++k, m >>= 1)
Chris@19 424 if (m & 1)
Chris@19 425 cmulj(w[k][0], w[k][1], r, i, r, i);
Chris@19 426 }
Chris@19 427 }
Chris@19 428
Chris@19 429 static void bitrev(int n, N *a)
Chris@19 430 {
Chris@19 431 int i, j, m;
Chris@19 432 for (i = j = 0; i < n - 1; ++i) {
Chris@19 433 if (i < j) {
Chris@19 434 N t;
Chris@19 435 cpy(a[2*i], t); cpy(a[2*j], a[2*i]); cpy(t, a[2*j]);
Chris@19 436 cpy(a[2*i+1], t); cpy(a[2*j+1], a[2*i+1]); cpy(t, a[2*j+1]);
Chris@19 437 }
Chris@19 438
Chris@19 439 /* bit reversed counter */
Chris@19 440 m = n; do { m >>= 1; j ^= m; } while (!(j & m));
Chris@19 441 }
Chris@19 442 }
Chris@19 443
Chris@19 444 static void fft0(int n, N *a, int sign)
Chris@19 445 {
Chris@19 446 int i, j, k;
Chris@19 447
Chris@19 448 bitrev(n, a);
Chris@19 449 for (i = 1; i < n; i = 2 * i) {
Chris@19 450 for (j = 0; j < i; ++j) {
Chris@19 451 N wr, wi;
Chris@19 452 mcexp(sign * (int)j, 2 * i, wr, wi);
Chris@19 453 for (k = j; k < n; k += 2 * i) {
Chris@19 454 N *a0 = a + 2 * k;
Chris@19 455 N *a1 = a0 + 2 * i;
Chris@19 456 N r0, i0, r1, i1, t0, t1, xr, xi;
Chris@19 457 cpy(a0[0], r0); cpy(a0[1], i0);
Chris@19 458 cpy(a1[0], r1); cpy(a1[1], i1);
Chris@19 459 mul(r1, wr, t0); mul(i1, wi, t1); sub(t0, t1, xr);
Chris@19 460 mul(r1, wi, t0); mul(i1, wr, t1); add(t0, t1, xi);
Chris@19 461 add(r0, xr, a0[0]); add(i0, xi, a0[1]);
Chris@19 462 sub(r0, xr, a1[0]); sub(i0, xi, a1[1]);
Chris@19 463 }
Chris@19 464 }
Chris@19 465 }
Chris@19 466 }
Chris@19 467
Chris@19 468 /* a[2*k]+i*a[2*k+1] = exp(2*pi*i*k^2/(2*n)) */
Chris@19 469 static void bluestein_sequence(int n, N *a)
Chris@19 470 {
Chris@19 471 int k, ksq, n2 = 2 * n;
Chris@19 472
Chris@19 473 ksq = 1; /* (-1)^2 */
Chris@19 474 for (k = 0; k < n; ++k) {
Chris@19 475 /* careful with overflow */
Chris@19 476 ksq = ksq + 2*k - 1; while (ksq > n2) ksq -= n2;
Chris@19 477 mcexp(ksq, n2, a[2*k], a[2*k+1]);
Chris@19 478 }
Chris@19 479 }
Chris@19 480
Chris@19 481 static int pow2_atleast(int x)
Chris@19 482 {
Chris@19 483 int h;
Chris@19 484 for (h = 1; h < x; h = 2 * h)
Chris@19 485 ;
Chris@19 486 return h;
Chris@19 487 }
Chris@19 488
Chris@19 489 static N *cached_bluestein_w = 0;
Chris@19 490 static N *cached_bluestein_y = 0;
Chris@19 491 static int cached_bluestein_n = -1;
Chris@19 492
Chris@19 493 static void bluestein(int n, N *a)
Chris@19 494 {
Chris@19 495 int nb = pow2_atleast(2 * n);
Chris@19 496 N *b = (N *)bench_malloc(2 * nb * sizeof(N));
Chris@19 497 N *w = cached_bluestein_w;
Chris@19 498 N *y = cached_bluestein_y;
Chris@19 499 N nbinv;
Chris@19 500 int i;
Chris@19 501
Chris@19 502 fromreal(1.0 / nb, nbinv); /* exact because nb = 2^k */
Chris@19 503
Chris@19 504 if (cached_bluestein_n != n) {
Chris@19 505 if (w) bench_free(w);
Chris@19 506 if (y) bench_free(y);
Chris@19 507 w = (N *)bench_malloc(2 * n * sizeof(N));
Chris@19 508 y = (N *)bench_malloc(2 * nb * sizeof(N));
Chris@19 509 cached_bluestein_n = n;
Chris@19 510 cached_bluestein_w = w;
Chris@19 511 cached_bluestein_y = y;
Chris@19 512
Chris@19 513 bluestein_sequence(n, w);
Chris@19 514 for (i = 0; i < 2*nb; ++i) cpy(zero, y[i]);
Chris@19 515
Chris@19 516 for (i = 0; i < n; ++i) {
Chris@19 517 cpy(w[2*i], y[2*i]);
Chris@19 518 cpy(w[2*i+1], y[2*i+1]);
Chris@19 519 }
Chris@19 520 for (i = 1; i < n; ++i) {
Chris@19 521 cpy(w[2*i], y[2*(nb-i)]);
Chris@19 522 cpy(w[2*i+1], y[2*(nb-i)+1]);
Chris@19 523 }
Chris@19 524
Chris@19 525 fft0(nb, y, -1);
Chris@19 526 }
Chris@19 527
Chris@19 528 for (i = 0; i < 2*nb; ++i) cpy(zero, b[i]);
Chris@19 529
Chris@19 530 for (i = 0; i < n; ++i)
Chris@19 531 cmulj(w[2*i], w[2*i+1], a[2*i], a[2*i+1], b[2*i], b[2*i+1]);
Chris@19 532
Chris@19 533 /* scaled convolution b * y */
Chris@19 534 fft0(nb, b, -1);
Chris@19 535
Chris@19 536 for (i = 0; i < nb; ++i)
Chris@19 537 cmul(b[2*i], b[2*i+1], y[2*i], y[2*i+1], b[2*i], b[2*i+1]);
Chris@19 538 fft0(nb, b, 1);
Chris@19 539
Chris@19 540 for (i = 0; i < n; ++i) {
Chris@19 541 cmulj(w[2*i], w[2*i+1], b[2*i], b[2*i+1], a[2*i], a[2*i+1]);
Chris@19 542 mul(nbinv, a[2*i], a[2*i]);
Chris@19 543 mul(nbinv, a[2*i+1], a[2*i+1]);
Chris@19 544 }
Chris@19 545
Chris@19 546 bench_free(b);
Chris@19 547 }
Chris@19 548
Chris@19 549 static void swapri(int n, N *a)
Chris@19 550 {
Chris@19 551 int i;
Chris@19 552 for (i = 0; i < n; ++i) {
Chris@19 553 N t;
Chris@19 554 cpy(a[2 * i], t);
Chris@19 555 cpy(a[2 * i + 1], a[2 * i]);
Chris@19 556 cpy(t, a[2 * i + 1]);
Chris@19 557 }
Chris@19 558 }
Chris@19 559
Chris@19 560 static void fft1(int n, N *a, int sign)
Chris@19 561 {
Chris@19 562 if (power_of_two(n)) {
Chris@19 563 fft0(n, a, sign);
Chris@19 564 } else {
Chris@19 565 if (sign == 1) swapri(n, a);
Chris@19 566 bluestein(n, a);
Chris@19 567 if (sign == 1) swapri(n, a);
Chris@19 568 }
Chris@19 569 }
Chris@19 570
Chris@19 571 static void fromrealv(int n, bench_complex *a, N *b)
Chris@19 572 {
Chris@19 573 int i;
Chris@19 574
Chris@19 575 for (i = 0; i < n; ++i) {
Chris@19 576 fromreal(c_re(a[i]), b[2 * i]);
Chris@19 577 fromreal(c_im(a[i]), b[2 * i + 1]);
Chris@19 578 }
Chris@19 579 }
Chris@19 580
Chris@19 581 static void compare(int n, N *a, N *b, double *err)
Chris@19 582 {
Chris@19 583 int i;
Chris@19 584 double e1, e2, einf;
Chris@19 585 double n1, n2, ninf;
Chris@19 586
Chris@19 587 e1 = e2 = einf = 0.0;
Chris@19 588 n1 = n2 = ninf = 0.0;
Chris@19 589
Chris@19 590 # define DO(x1, x2, xinf, var) { \
Chris@19 591 double d = var; \
Chris@19 592 if (d < 0) d = -d; \
Chris@19 593 x1 += d; x2 += d * d; if (d > xinf) xinf = d; \
Chris@19 594 }
Chris@19 595
Chris@19 596 for (i = 0; i < 2 * n; ++i) {
Chris@19 597 N dd;
Chris@19 598 sub(a[i], b[i], dd);
Chris@19 599 DO(n1, n2, ninf, toreal(a[i]));
Chris@19 600 DO(e1, e2, einf, toreal(dd));
Chris@19 601 }
Chris@19 602
Chris@19 603 # undef DO
Chris@19 604 err[0] = e1 / n1;
Chris@19 605 err[1] = sqrt(e2 / n2);
Chris@19 606 err[2] = einf / ninf;
Chris@19 607 }
Chris@19 608
Chris@19 609 void fftaccuracy(int n, bench_complex *a, bench_complex *ffta,
Chris@19 610 int sign, double err[6])
Chris@19 611 {
Chris@19 612 N *b = (N *)bench_malloc(2 * n * sizeof(N));
Chris@19 613 N *fftb = (N *)bench_malloc(2 * n * sizeof(N));
Chris@19 614 N mn, ninv;
Chris@19 615 int i;
Chris@19 616
Chris@19 617 fromreal(n, mn); inv(mn, ninv);
Chris@19 618
Chris@19 619 /* forward error */
Chris@19 620 fromrealv(n, a, b); fromrealv(n, ffta, fftb);
Chris@19 621 fft1(n, b, sign);
Chris@19 622 compare(n, b, fftb, err);
Chris@19 623
Chris@19 624 /* backward error */
Chris@19 625 fromrealv(n, a, b); fromrealv(n, ffta, fftb);
Chris@19 626 for (i = 0; i < 2 * n; ++i) mul(fftb[i], ninv, fftb[i]);
Chris@19 627 fft1(n, fftb, -sign);
Chris@19 628 compare(n, b, fftb, err + 3);
Chris@19 629
Chris@19 630 bench_free(fftb);
Chris@19 631 bench_free(b);
Chris@19 632 }
Chris@19 633
Chris@19 634 void fftaccuracy_done(void)
Chris@19 635 {
Chris@19 636 if (cached_bluestein_w) bench_free(cached_bluestein_w);
Chris@19 637 if (cached_bluestein_y) bench_free(cached_bluestein_y);
Chris@19 638 cached_bluestein_w = 0;
Chris@19 639 cached_bluestein_y = 0;
Chris@19 640 cached_bluestein_n = -1;
Chris@19 641 }