annotate fft/nayukic/fft.c @ 40:223f770b5341 kissfft-double tip

Try a double-precision kissfft
author Chris Cannam
date Wed, 07 Sep 2016 10:40:32 +0100
parents bbf5d4e825eb
children
rev   line source
Chris@32 1 /*
Chris@32 2 * Free FFT and convolution (C)
Chris@32 3 *
Chris@32 4 * Copyright (c) 2014 Project Nayuki
Chris@32 5 * http://www.nayuki.io/page/free-small-fft-in-multiple-languages
Chris@32 6 *
Chris@32 7 * (MIT License)
Chris@32 8 * Permission is hereby granted, free of charge, to any person obtaining a copy of
Chris@32 9 * this software and associated documentation files (the "Software"), to deal in
Chris@32 10 * the Software without restriction, including without limitation the rights to
Chris@32 11 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
Chris@32 12 * the Software, and to permit persons to whom the Software is furnished to do so,
Chris@32 13 * subject to the following conditions:
Chris@32 14 * - The above copyright notice and this permission notice shall be included in
Chris@32 15 * all copies or substantial portions of the Software.
Chris@32 16 * - The Software is provided "as is", without warranty of any kind, express or
Chris@32 17 * implied, including but not limited to the warranties of merchantability,
Chris@32 18 * fitness for a particular purpose and noninfringement. In no event shall the
Chris@32 19 * authors or copyright holders be liable for any claim, damages or other
Chris@32 20 * liability, whether in an action of contract, tort or otherwise, arising from,
Chris@32 21 * out of or in connection with the Software or the use or other dealings in the
Chris@32 22 * Software.
Chris@32 23 */
Chris@32 24
Chris@32 25 #include <math.h>
Chris@32 26 #include <stdlib.h>
Chris@32 27 #include <string.h>
Chris@32 28 #include <stdio.h>
Chris@32 29 #include "fft.h"
Chris@32 30
Chris@32 31
Chris@32 32 // Private function prototypes
Chris@32 33 static size_t reverse_bits(size_t x, unsigned int n);
Chris@32 34 static void *memdup(const void *src, size_t n);
Chris@32 35
Chris@32 36 #define SIZE_MAX ((size_t)-1)
Chris@32 37
Chris@32 38
Chris@32 39 int transform(double real[], double imag[], size_t n) {
Chris@32 40 if (n == 0)
Chris@32 41 return 1;
Chris@32 42 else if ((n & (n - 1)) == 0) // Is power of 2
Chris@32 43 return transform_radix2(real, imag, n);
Chris@32 44 else // More complicated algorithm for arbitrary sizes
Chris@32 45 return transform_bluestein(real, imag, n);
Chris@32 46 }
Chris@32 47
Chris@32 48
Chris@32 49 int inverse_transform(double real[], double imag[], size_t n) {
Chris@32 50 return transform(imag, real, n);
Chris@32 51 }
Chris@32 52
Chris@32 53 tables *precalc(size_t n) {
Chris@32 54 unsigned int levels;
Chris@32 55 // Compute levels = floor(log2(n))
Chris@32 56 {
Chris@32 57 size_t temp = n;
Chris@32 58 levels = 0;
Chris@32 59 while (temp > 1) {
Chris@32 60 levels++;
Chris@32 61 temp >>= 1;
Chris@32 62 }
Chris@32 63 if (1u << levels != n)
Chris@32 64 return 0; // n is not a power of 2
Chris@32 65 }
Chris@32 66 if (SIZE_MAX / sizeof(double) < n / 2) return 0;
Chris@32 67 tables *tables = malloc(sizeof(tables));
Chris@32 68 if (!tables) return tables;
Chris@32 69 tables->levels = levels;
Chris@32 70 size_t size = (n / 2) * sizeof(double);
Chris@32 71 tables->cos = malloc(size);
Chris@32 72 if (!tables->cos) {
Chris@32 73 free(tables);
Chris@32 74 return 0;
Chris@32 75 }
Chris@32 76 tables->sin = malloc(size);
Chris@32 77 if (!tables->sin) {
Chris@32 78 free(tables->cos);
Chris@32 79 free(tables);
Chris@32 80 return 0;
Chris@32 81 }
Chris@32 82 int i;
Chris@32 83 for (i = 0; i < n / 2; i++) {
Chris@32 84 tables->cos[i] = cos(2 * M_PI * i / n);
Chris@32 85 tables->sin[i] = sin(2 * M_PI * i / n);
Chris@32 86 }
Chris@32 87 return tables;
Chris@32 88 }
Chris@32 89
Chris@33 90 tables_f *precalc_f(size_t n) {
Chris@33 91 unsigned int levels;
Chris@33 92 // Compute levels = floor(log2(n))
Chris@33 93 {
Chris@33 94 size_t temp = n;
Chris@33 95 levels = 0;
Chris@33 96 while (temp > 1) {
Chris@33 97 levels++;
Chris@33 98 temp >>= 1;
Chris@33 99 }
Chris@33 100 if (1u << levels != n)
Chris@33 101 return 0; // n is not a power of 2
Chris@33 102 }
Chris@33 103 if (SIZE_MAX / sizeof(float) < n / 2) return 0;
Chris@33 104 tables_f *tables = malloc(sizeof(tables_f));
Chris@33 105 if (!tables) return tables;
Chris@33 106 tables->levels = levels;
Chris@33 107 size_t size = (n / 2) * sizeof(float);
Chris@33 108 tables->cos = malloc(size);
Chris@33 109 if (!tables->cos) {
Chris@33 110 free(tables);
Chris@33 111 return 0;
Chris@33 112 }
Chris@33 113 tables->sin = malloc(size);
Chris@33 114 if (!tables->sin) {
Chris@33 115 free(tables->cos);
Chris@33 116 free(tables);
Chris@33 117 return 0;
Chris@33 118 }
Chris@33 119 int i;
Chris@33 120 for (i = 0; i < n / 2; i++) {
Chris@33 121 tables->cos[i] = cos(2 * M_PI * i / n);
Chris@33 122 tables->sin[i] = sin(2 * M_PI * i / n);
Chris@33 123 }
Chris@33 124 return tables;
Chris@33 125 }
Chris@33 126
Chris@32 127 void dispose(tables *tables) {
Chris@32 128 if (!tables) return;
Chris@32 129 free(tables->cos);
Chris@32 130 free(tables->sin);
Chris@32 131 free(tables);
Chris@32 132 }
Chris@32 133
Chris@33 134 void dispose_f(tables_f *tables) {
Chris@33 135 if (!tables) return;
Chris@33 136 free(tables->cos);
Chris@33 137 free(tables->sin);
Chris@33 138 free(tables);
Chris@33 139 }
Chris@33 140
Chris@32 141 void transform_radix2_precalc(double real[], double imag[], int n, tables *tables) {
Chris@32 142 double *cos_table, *sin_table;
Chris@32 143 int size;
Chris@32 144 int i;
Chris@32 145
Chris@32 146 // Trignometric tables
Chris@32 147 cos_table = tables->cos;
Chris@32 148 sin_table = tables->sin;
Chris@32 149
Chris@32 150 // Bit-reversed addressing permutation
Chris@32 151 for (i = 0; i < n; i++) {
Chris@32 152 int j = reverse_bits(i, tables->levels);
Chris@32 153 if (j > i) {
Chris@32 154 double temp = real[i];
Chris@32 155 real[i] = real[j];
Chris@32 156 real[j] = temp;
Chris@32 157 temp = imag[i];
Chris@32 158 imag[i] = imag[j];
Chris@32 159 imag[j] = temp;
Chris@32 160 }
Chris@32 161 }
Chris@32 162
Chris@32 163 // Cooley-Tukey decimation-in-time radix-2 FFT
Chris@32 164 for (size = 2; size <= n; size *= 2) {
Chris@32 165 int halfsize = size / 2;
Chris@32 166 int tablestep = n / size;
Chris@32 167 for (i = 0; i < n; i += size) {
Chris@32 168 int j;
Chris@32 169 int k;
Chris@32 170 for (j = i, k = 0; j < i + halfsize; j++, k += tablestep) {
Chris@32 171 double tpre = real[j+halfsize] * cos_table[k] + imag[j+halfsize] * sin_table[k];
Chris@32 172 double tpim = -real[j+halfsize] * sin_table[k] + imag[j+halfsize] * cos_table[k];
Chris@32 173 real[j + halfsize] = real[j] - tpre;
Chris@32 174 imag[j + halfsize] = imag[j] - tpim;
Chris@32 175 real[j] += tpre;
Chris@32 176 imag[j] += tpim;
Chris@32 177 }
Chris@32 178 }
Chris@32 179 if (size == n) // Prevent overflow in 'size *= 2'
Chris@32 180 break;
Chris@32 181 }
Chris@32 182 }
Chris@32 183
Chris@33 184 void transform_radix2_precalc_f(float real[], float imag[], int n, tables_f *tables) {
Chris@33 185 float *cos_table, *sin_table;
Chris@33 186 int size;
Chris@33 187 int i;
Chris@33 188
Chris@33 189 // Trignometric tables
Chris@33 190 cos_table = tables->cos;
Chris@33 191 sin_table = tables->sin;
Chris@33 192
Chris@33 193 // Bit-reversed addressing permutation
Chris@33 194 for (i = 0; i < n; i++) {
Chris@33 195 int j = reverse_bits(i, tables->levels);
Chris@33 196 if (j > i) {
Chris@33 197 float temp = real[i];
Chris@33 198 real[i] = real[j];
Chris@33 199 real[j] = temp;
Chris@33 200 temp = imag[i];
Chris@33 201 imag[i] = imag[j];
Chris@33 202 imag[j] = temp;
Chris@33 203 }
Chris@33 204 }
Chris@33 205
Chris@33 206 // Cooley-Tukey decimation-in-time radix-2 FFT
Chris@33 207 for (size = 2; size <= n; size *= 2) {
Chris@33 208 int halfsize = size / 2;
Chris@33 209 int tablestep = n / size;
Chris@33 210 for (i = 0; i < n; i += size) {
Chris@33 211 int j;
Chris@33 212 int k;
Chris@33 213 for (j = i, k = 0; j < i + halfsize; j++, k += tablestep) {
Chris@33 214 float tpre = real[j+halfsize] * cos_table[k] + imag[j+halfsize] * sin_table[k];
Chris@33 215 float tpim = -real[j+halfsize] * sin_table[k] + imag[j+halfsize] * cos_table[k];
Chris@33 216 real[j + halfsize] = real[j] - tpre;
Chris@33 217 imag[j + halfsize] = imag[j] - tpim;
Chris@33 218 real[j] += tpre;
Chris@33 219 imag[j] += tpim;
Chris@33 220 }
Chris@33 221 }
Chris@33 222 if (size == n) // Prevent overflow in 'size *= 2'
Chris@33 223 break;
Chris@33 224 }
Chris@33 225 }
Chris@33 226
Chris@32 227 int transform_radix2(double real[], double imag[], size_t n) {
Chris@32 228 // Variables
Chris@32 229 int status = 0;
Chris@32 230 unsigned int levels;
Chris@32 231 double *cos_table, *sin_table;
Chris@32 232 size_t size;
Chris@32 233 size_t i;
Chris@32 234
Chris@32 235 // Compute levels = floor(log2(n))
Chris@32 236 {
Chris@32 237 size_t temp = n;
Chris@32 238 levels = 0;
Chris@32 239 while (temp > 1) {
Chris@32 240 levels++;
Chris@32 241 temp >>= 1;
Chris@32 242 }
Chris@32 243 if (1u << levels != n)
Chris@32 244 return 0; // n is not a power of 2
Chris@32 245 }
Chris@32 246
Chris@32 247 // Trignometric tables
Chris@32 248 if (SIZE_MAX / sizeof(double) < n / 2)
Chris@32 249 return 0;
Chris@32 250 size = (n / 2) * sizeof(double);
Chris@32 251 cos_table = malloc(size);
Chris@32 252 sin_table = malloc(size);
Chris@32 253 if (cos_table == NULL || sin_table == NULL)
Chris@32 254 goto cleanup;
Chris@32 255 for (i = 0; i < n / 2; i++) {
Chris@32 256 cos_table[i] = cos(2 * M_PI * i / n);
Chris@32 257 sin_table[i] = sin(2 * M_PI * i / n);
Chris@32 258 }
Chris@32 259
Chris@32 260 // Bit-reversed addressing permutation
Chris@32 261 for (i = 0; i < n; i++) {
Chris@32 262 size_t j = reverse_bits(i, levels);
Chris@32 263 if (j > i) {
Chris@32 264 double temp = real[i];
Chris@32 265 real[i] = real[j];
Chris@32 266 real[j] = temp;
Chris@32 267 temp = imag[i];
Chris@32 268 imag[i] = imag[j];
Chris@32 269 imag[j] = temp;
Chris@32 270 }
Chris@32 271 }
Chris@32 272
Chris@32 273 // Cooley-Tukey decimation-in-time radix-2 FFT
Chris@32 274 for (size = 2; size <= n; size *= 2) {
Chris@32 275 size_t halfsize = size / 2;
Chris@32 276 size_t tablestep = n / size;
Chris@32 277 for (i = 0; i < n; i += size) {
Chris@32 278 size_t j;
Chris@32 279 size_t k;
Chris@32 280 for (j = i, k = 0; j < i + halfsize; j++, k += tablestep) {
Chris@32 281 double tpre = real[j+halfsize] * cos_table[k] + imag[j+halfsize] * sin_table[k];
Chris@32 282 double tpim = -real[j+halfsize] * sin_table[k] + imag[j+halfsize] * cos_table[k];
Chris@32 283 real[j + halfsize] = real[j] - tpre;
Chris@32 284 imag[j + halfsize] = imag[j] - tpim;
Chris@32 285 real[j] += tpre;
Chris@32 286 imag[j] += tpim;
Chris@32 287 }
Chris@32 288 }
Chris@32 289 if (size == n) // Prevent overflow in 'size *= 2'
Chris@32 290 break;
Chris@32 291 }
Chris@32 292 status = 1;
Chris@32 293
Chris@32 294 cleanup:
Chris@32 295 free(sin_table);
Chris@32 296 free(cos_table);
Chris@32 297 return status;
Chris@32 298 }
Chris@32 299
Chris@32 300
Chris@32 301 int transform_bluestein(double real[], double imag[], size_t n) {
Chris@32 302 // Variables
Chris@32 303 int status = 0;
Chris@32 304 double *cos_table, *sin_table;
Chris@32 305 double *areal, *aimag;
Chris@32 306 double *breal, *bimag;
Chris@32 307 double *creal, *cimag;
Chris@32 308 size_t m;
Chris@32 309 size_t size_n, size_m;
Chris@32 310 size_t i;
Chris@32 311
Chris@32 312 // Find a power-of-2 convolution length m such that m >= n * 2 + 1
Chris@32 313 {
Chris@32 314 size_t target;
Chris@32 315 if (n > (SIZE_MAX - 1) / 2)
Chris@32 316 return 0;
Chris@32 317 target = n * 2 + 1;
Chris@32 318 for (m = 1; m < target; m *= 2) {
Chris@32 319 if (SIZE_MAX / 2 < m)
Chris@32 320 return 0;
Chris@32 321 }
Chris@32 322 }
Chris@32 323
Chris@32 324 // Allocate memory
Chris@32 325 if (SIZE_MAX / sizeof(double) < n || SIZE_MAX / sizeof(double) < m)
Chris@32 326 return 0;
Chris@32 327 size_n = n * sizeof(double);
Chris@32 328 size_m = m * sizeof(double);
Chris@32 329 cos_table = malloc(size_n);
Chris@32 330 sin_table = malloc(size_n);
Chris@32 331 areal = calloc(m, sizeof(double));
Chris@32 332 aimag = calloc(m, sizeof(double));
Chris@32 333 breal = calloc(m, sizeof(double));
Chris@32 334 bimag = calloc(m, sizeof(double));
Chris@32 335 creal = malloc(size_m);
Chris@32 336 cimag = malloc(size_m);
Chris@32 337 if (cos_table == NULL || sin_table == NULL
Chris@32 338 || areal == NULL || aimag == NULL
Chris@32 339 || breal == NULL || bimag == NULL
Chris@32 340 || creal == NULL || cimag == NULL)
Chris@32 341 goto cleanup;
Chris@32 342
Chris@32 343 // Trignometric tables
Chris@32 344 for (i = 0; i < n; i++) {
Chris@32 345 double temp = M_PI * (size_t)((unsigned long long)i * i % ((unsigned long long)n * 2)) / n;
Chris@32 346 // Less accurate version if long long is unavailable: double temp = M_PI * i * i / n;
Chris@32 347 cos_table[i] = cos(temp);
Chris@32 348 sin_table[i] = sin(temp);
Chris@32 349 }
Chris@32 350
Chris@32 351 // Temporary vectors and preprocessing
Chris@32 352 for (i = 0; i < n; i++) {
Chris@32 353 areal[i] = real[i] * cos_table[i] + imag[i] * sin_table[i];
Chris@32 354 aimag[i] = -real[i] * sin_table[i] + imag[i] * cos_table[i];
Chris@32 355 }
Chris@32 356 breal[0] = cos_table[0];
Chris@32 357 bimag[0] = sin_table[0];
Chris@32 358 for (i = 1; i < n; i++) {
Chris@32 359 breal[i] = breal[m - i] = cos_table[i];
Chris@32 360 bimag[i] = bimag[m - i] = sin_table[i];
Chris@32 361 }
Chris@32 362
Chris@32 363 // Convolution
Chris@32 364 if (!convolve_complex(areal, aimag, breal, bimag, creal, cimag, m))
Chris@32 365 goto cleanup;
Chris@32 366
Chris@32 367 // Postprocessing
Chris@32 368 for (i = 0; i < n; i++) {
Chris@32 369 real[i] = creal[i] * cos_table[i] + cimag[i] * sin_table[i];
Chris@32 370 imag[i] = -creal[i] * sin_table[i] + cimag[i] * cos_table[i];
Chris@32 371 }
Chris@32 372 status = 1;
Chris@32 373
Chris@32 374 // Deallocation
Chris@32 375 cleanup:
Chris@32 376 free(cimag);
Chris@32 377 free(creal);
Chris@32 378 free(bimag);
Chris@32 379 free(breal);
Chris@32 380 free(aimag);
Chris@32 381 free(areal);
Chris@32 382 free(sin_table);
Chris@32 383 free(cos_table);
Chris@32 384 return status;
Chris@32 385 }
Chris@32 386
Chris@32 387
Chris@32 388 int convolve_real(const double x[], const double y[], double out[], size_t n) {
Chris@32 389 double *ximag, *yimag, *zimag;
Chris@32 390 int status = 0;
Chris@32 391 ximag = calloc(n, sizeof(double));
Chris@32 392 yimag = calloc(n, sizeof(double));
Chris@32 393 zimag = calloc(n, sizeof(double));
Chris@32 394 if (ximag == NULL || yimag == NULL || zimag == NULL)
Chris@32 395 goto cleanup;
Chris@32 396
Chris@32 397 status = convolve_complex(x, ximag, y, yimag, out, zimag, n);
Chris@32 398 cleanup:
Chris@32 399 free(zimag);
Chris@32 400 free(yimag);
Chris@32 401 free(ximag);
Chris@32 402 return status;
Chris@32 403 }
Chris@32 404
Chris@32 405
Chris@32 406 int convolve_complex(const double xreal[], const double ximag[], const double yreal[], const double yimag[], double outreal[], double outimag[], size_t n) {
Chris@32 407 int status = 0;
Chris@32 408 size_t size;
Chris@32 409 size_t i;
Chris@32 410 double *xr, *xi, *yr, *yi;
Chris@32 411 if (SIZE_MAX / sizeof(double) < n)
Chris@32 412 return 0;
Chris@32 413 size = n * sizeof(double);
Chris@32 414 xr = memdup(xreal, size);
Chris@32 415 xi = memdup(ximag, size);
Chris@32 416 yr = memdup(yreal, size);
Chris@32 417 yi = memdup(yimag, size);
Chris@32 418 if (xr == NULL || xi == NULL || yr == NULL || yi == NULL)
Chris@32 419 goto cleanup;
Chris@32 420
Chris@32 421 if (!transform(xr, xi, n))
Chris@32 422 goto cleanup;
Chris@32 423 if (!transform(yr, yi, n))
Chris@32 424 goto cleanup;
Chris@32 425 for (i = 0; i < n; i++) {
Chris@32 426 double temp = xr[i] * yr[i] - xi[i] * yi[i];
Chris@32 427 xi[i] = xi[i] * yr[i] + xr[i] * yi[i];
Chris@32 428 xr[i] = temp;
Chris@32 429 }
Chris@32 430 if (!inverse_transform(xr, xi, n))
Chris@32 431 goto cleanup;
Chris@32 432 for (i = 0; i < n; i++) { // Scaling (because this FFT implementation omits it)
Chris@32 433 outreal[i] = xr[i] / n;
Chris@32 434 outimag[i] = xi[i] / n;
Chris@32 435 }
Chris@32 436 status = 1;
Chris@32 437
Chris@32 438 cleanup:
Chris@32 439 free(yi);
Chris@32 440 free(yr);
Chris@32 441 free(xi);
Chris@32 442 free(xr);
Chris@32 443 return status;
Chris@32 444 }
Chris@32 445
Chris@32 446
Chris@32 447 static size_t reverse_bits(size_t x, unsigned int n) {
Chris@32 448 size_t result = 0;
Chris@32 449 unsigned int i;
Chris@32 450 for (i = 0; i < n; i++, x >>= 1)
Chris@32 451 result = (result << 1) | (x & 1);
Chris@32 452 return result;
Chris@32 453 }
Chris@32 454
Chris@32 455
Chris@32 456 static void *memdup(const void *src, size_t n) {
Chris@32 457 void *dest = malloc(n);
Chris@32 458 if (dest != NULL)
Chris@32 459 memcpy(dest, src, n);
Chris@32 460 return dest;
Chris@32 461 }