Chris@32: /* Chris@32: * Free FFT and convolution (C) Chris@32: * Chris@32: * Copyright (c) 2014 Project Nayuki Chris@32: * http://www.nayuki.io/page/free-small-fft-in-multiple-languages Chris@32: * Chris@32: * (MIT License) Chris@32: * Permission is hereby granted, free of charge, to any person obtaining a copy of Chris@32: * this software and associated documentation files (the "Software"), to deal in Chris@32: * the Software without restriction, including without limitation the rights to Chris@32: * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of Chris@32: * the Software, and to permit persons to whom the Software is furnished to do so, Chris@32: * subject to the following conditions: Chris@32: * - The above copyright notice and this permission notice shall be included in Chris@32: * all copies or substantial portions of the Software. Chris@32: * - The Software is provided "as is", without warranty of any kind, express or Chris@32: * implied, including but not limited to the warranties of merchantability, Chris@32: * fitness for a particular purpose and noninfringement. In no event shall the Chris@32: * authors or copyright holders be liable for any claim, damages or other Chris@32: * liability, whether in an action of contract, tort or otherwise, arising from, Chris@32: * out of or in connection with the Software or the use or other dealings in the Chris@32: * Software. Chris@32: */ Chris@32: Chris@32: #include Chris@32: #include Chris@32: #include Chris@32: #include Chris@32: #include "fft.h" Chris@32: Chris@32: Chris@32: // Private function prototypes Chris@32: static size_t reverse_bits(size_t x, unsigned int n); Chris@32: static void *memdup(const void *src, size_t n); Chris@32: Chris@32: #define SIZE_MAX ((size_t)-1) Chris@32: Chris@32: Chris@32: int transform(double real[], double imag[], size_t n) { Chris@32: if (n == 0) Chris@32: return 1; Chris@32: else if ((n & (n - 1)) == 0) // Is power of 2 Chris@32: return transform_radix2(real, imag, n); Chris@32: else // More complicated algorithm for arbitrary sizes Chris@32: return transform_bluestein(real, imag, n); Chris@32: } Chris@32: Chris@32: Chris@32: int inverse_transform(double real[], double imag[], size_t n) { Chris@32: return transform(imag, real, n); Chris@32: } Chris@32: Chris@32: tables *precalc(size_t n) { Chris@32: unsigned int levels; Chris@32: // Compute levels = floor(log2(n)) Chris@32: { Chris@32: size_t temp = n; Chris@32: levels = 0; Chris@32: while (temp > 1) { Chris@32: levels++; Chris@32: temp >>= 1; Chris@32: } Chris@32: if (1u << levels != n) Chris@32: return 0; // n is not a power of 2 Chris@32: } Chris@32: if (SIZE_MAX / sizeof(double) < n / 2) return 0; Chris@32: tables *tables = malloc(sizeof(tables)); Chris@32: if (!tables) return tables; Chris@32: tables->levels = levels; Chris@32: size_t size = (n / 2) * sizeof(double); Chris@32: tables->cos = malloc(size); Chris@32: if (!tables->cos) { Chris@32: free(tables); Chris@32: return 0; Chris@32: } Chris@32: tables->sin = malloc(size); Chris@32: if (!tables->sin) { Chris@32: free(tables->cos); Chris@32: free(tables); Chris@32: return 0; Chris@32: } Chris@32: int i; Chris@32: for (i = 0; i < n / 2; i++) { Chris@32: tables->cos[i] = cos(2 * M_PI * i / n); Chris@32: tables->sin[i] = sin(2 * M_PI * i / n); Chris@32: } Chris@32: return tables; Chris@32: } Chris@32: Chris@33: tables_f *precalc_f(size_t n) { Chris@33: unsigned int levels; Chris@33: // Compute levels = floor(log2(n)) Chris@33: { Chris@33: size_t temp = n; Chris@33: levels = 0; Chris@33: while (temp > 1) { Chris@33: levels++; Chris@33: temp >>= 1; Chris@33: } Chris@33: if (1u << levels != n) Chris@33: return 0; // n is not a power of 2 Chris@33: } Chris@33: if (SIZE_MAX / sizeof(float) < n / 2) return 0; Chris@33: tables_f *tables = malloc(sizeof(tables_f)); Chris@33: if (!tables) return tables; Chris@33: tables->levels = levels; Chris@33: size_t size = (n / 2) * sizeof(float); Chris@33: tables->cos = malloc(size); Chris@33: if (!tables->cos) { Chris@33: free(tables); Chris@33: return 0; Chris@33: } Chris@33: tables->sin = malloc(size); Chris@33: if (!tables->sin) { Chris@33: free(tables->cos); Chris@33: free(tables); Chris@33: return 0; Chris@33: } Chris@33: int i; Chris@33: for (i = 0; i < n / 2; i++) { Chris@33: tables->cos[i] = cos(2 * M_PI * i / n); Chris@33: tables->sin[i] = sin(2 * M_PI * i / n); Chris@33: } Chris@33: return tables; Chris@33: } Chris@33: Chris@32: void dispose(tables *tables) { Chris@32: if (!tables) return; Chris@32: free(tables->cos); Chris@32: free(tables->sin); Chris@32: free(tables); Chris@32: } Chris@32: Chris@33: void dispose_f(tables_f *tables) { Chris@33: if (!tables) return; Chris@33: free(tables->cos); Chris@33: free(tables->sin); Chris@33: free(tables); Chris@33: } Chris@33: Chris@32: void transform_radix2_precalc(double real[], double imag[], int n, tables *tables) { Chris@32: double *cos_table, *sin_table; Chris@32: int size; Chris@32: int i; Chris@32: Chris@32: // Trignometric tables Chris@32: cos_table = tables->cos; Chris@32: sin_table = tables->sin; Chris@32: Chris@32: // Bit-reversed addressing permutation Chris@32: for (i = 0; i < n; i++) { Chris@32: int j = reverse_bits(i, tables->levels); Chris@32: if (j > i) { Chris@32: double temp = real[i]; Chris@32: real[i] = real[j]; Chris@32: real[j] = temp; Chris@32: temp = imag[i]; Chris@32: imag[i] = imag[j]; Chris@32: imag[j] = temp; Chris@32: } Chris@32: } Chris@32: Chris@32: // Cooley-Tukey decimation-in-time radix-2 FFT Chris@32: for (size = 2; size <= n; size *= 2) { Chris@32: int halfsize = size / 2; Chris@32: int tablestep = n / size; Chris@32: for (i = 0; i < n; i += size) { Chris@32: int j; Chris@32: int k; Chris@32: for (j = i, k = 0; j < i + halfsize; j++, k += tablestep) { Chris@32: double tpre = real[j+halfsize] * cos_table[k] + imag[j+halfsize] * sin_table[k]; Chris@32: double tpim = -real[j+halfsize] * sin_table[k] + imag[j+halfsize] * cos_table[k]; Chris@32: real[j + halfsize] = real[j] - tpre; Chris@32: imag[j + halfsize] = imag[j] - tpim; Chris@32: real[j] += tpre; Chris@32: imag[j] += tpim; Chris@32: } Chris@32: } Chris@32: if (size == n) // Prevent overflow in 'size *= 2' Chris@32: break; Chris@32: } Chris@32: } Chris@32: Chris@33: void transform_radix2_precalc_f(float real[], float imag[], int n, tables_f *tables) { Chris@33: float *cos_table, *sin_table; Chris@33: int size; Chris@33: int i; Chris@33: Chris@33: // Trignometric tables Chris@33: cos_table = tables->cos; Chris@33: sin_table = tables->sin; Chris@33: Chris@33: // Bit-reversed addressing permutation Chris@33: for (i = 0; i < n; i++) { Chris@33: int j = reverse_bits(i, tables->levels); Chris@33: if (j > i) { Chris@33: float temp = real[i]; Chris@33: real[i] = real[j]; Chris@33: real[j] = temp; Chris@33: temp = imag[i]; Chris@33: imag[i] = imag[j]; Chris@33: imag[j] = temp; Chris@33: } Chris@33: } Chris@33: Chris@33: // Cooley-Tukey decimation-in-time radix-2 FFT Chris@33: for (size = 2; size <= n; size *= 2) { Chris@33: int halfsize = size / 2; Chris@33: int tablestep = n / size; Chris@33: for (i = 0; i < n; i += size) { Chris@33: int j; Chris@33: int k; Chris@33: for (j = i, k = 0; j < i + halfsize; j++, k += tablestep) { Chris@33: float tpre = real[j+halfsize] * cos_table[k] + imag[j+halfsize] * sin_table[k]; Chris@33: float tpim = -real[j+halfsize] * sin_table[k] + imag[j+halfsize] * cos_table[k]; Chris@33: real[j + halfsize] = real[j] - tpre; Chris@33: imag[j + halfsize] = imag[j] - tpim; Chris@33: real[j] += tpre; Chris@33: imag[j] += tpim; Chris@33: } Chris@33: } Chris@33: if (size == n) // Prevent overflow in 'size *= 2' Chris@33: break; Chris@33: } Chris@33: } Chris@33: Chris@32: int transform_radix2(double real[], double imag[], size_t n) { Chris@32: // Variables Chris@32: int status = 0; Chris@32: unsigned int levels; Chris@32: double *cos_table, *sin_table; Chris@32: size_t size; Chris@32: size_t i; Chris@32: Chris@32: // Compute levels = floor(log2(n)) Chris@32: { Chris@32: size_t temp = n; Chris@32: levels = 0; Chris@32: while (temp > 1) { Chris@32: levels++; Chris@32: temp >>= 1; Chris@32: } Chris@32: if (1u << levels != n) Chris@32: return 0; // n is not a power of 2 Chris@32: } Chris@32: Chris@32: // Trignometric tables Chris@32: if (SIZE_MAX / sizeof(double) < n / 2) Chris@32: return 0; Chris@32: size = (n / 2) * sizeof(double); Chris@32: cos_table = malloc(size); Chris@32: sin_table = malloc(size); Chris@32: if (cos_table == NULL || sin_table == NULL) Chris@32: goto cleanup; Chris@32: for (i = 0; i < n / 2; i++) { Chris@32: cos_table[i] = cos(2 * M_PI * i / n); Chris@32: sin_table[i] = sin(2 * M_PI * i / n); Chris@32: } Chris@32: Chris@32: // Bit-reversed addressing permutation Chris@32: for (i = 0; i < n; i++) { Chris@32: size_t j = reverse_bits(i, levels); Chris@32: if (j > i) { Chris@32: double temp = real[i]; Chris@32: real[i] = real[j]; Chris@32: real[j] = temp; Chris@32: temp = imag[i]; Chris@32: imag[i] = imag[j]; Chris@32: imag[j] = temp; Chris@32: } Chris@32: } Chris@32: Chris@32: // Cooley-Tukey decimation-in-time radix-2 FFT Chris@32: for (size = 2; size <= n; size *= 2) { Chris@32: size_t halfsize = size / 2; Chris@32: size_t tablestep = n / size; Chris@32: for (i = 0; i < n; i += size) { Chris@32: size_t j; Chris@32: size_t k; Chris@32: for (j = i, k = 0; j < i + halfsize; j++, k += tablestep) { Chris@32: double tpre = real[j+halfsize] * cos_table[k] + imag[j+halfsize] * sin_table[k]; Chris@32: double tpim = -real[j+halfsize] * sin_table[k] + imag[j+halfsize] * cos_table[k]; Chris@32: real[j + halfsize] = real[j] - tpre; Chris@32: imag[j + halfsize] = imag[j] - tpim; Chris@32: real[j] += tpre; Chris@32: imag[j] += tpim; Chris@32: } Chris@32: } Chris@32: if (size == n) // Prevent overflow in 'size *= 2' Chris@32: break; Chris@32: } Chris@32: status = 1; Chris@32: Chris@32: cleanup: Chris@32: free(sin_table); Chris@32: free(cos_table); Chris@32: return status; Chris@32: } Chris@32: Chris@32: Chris@32: int transform_bluestein(double real[], double imag[], size_t n) { Chris@32: // Variables Chris@32: int status = 0; Chris@32: double *cos_table, *sin_table; Chris@32: double *areal, *aimag; Chris@32: double *breal, *bimag; Chris@32: double *creal, *cimag; Chris@32: size_t m; Chris@32: size_t size_n, size_m; Chris@32: size_t i; Chris@32: Chris@32: // Find a power-of-2 convolution length m such that m >= n * 2 + 1 Chris@32: { Chris@32: size_t target; Chris@32: if (n > (SIZE_MAX - 1) / 2) Chris@32: return 0; Chris@32: target = n * 2 + 1; Chris@32: for (m = 1; m < target; m *= 2) { Chris@32: if (SIZE_MAX / 2 < m) Chris@32: return 0; Chris@32: } Chris@32: } Chris@32: Chris@32: // Allocate memory Chris@32: if (SIZE_MAX / sizeof(double) < n || SIZE_MAX / sizeof(double) < m) Chris@32: return 0; Chris@32: size_n = n * sizeof(double); Chris@32: size_m = m * sizeof(double); Chris@32: cos_table = malloc(size_n); Chris@32: sin_table = malloc(size_n); Chris@32: areal = calloc(m, sizeof(double)); Chris@32: aimag = calloc(m, sizeof(double)); Chris@32: breal = calloc(m, sizeof(double)); Chris@32: bimag = calloc(m, sizeof(double)); Chris@32: creal = malloc(size_m); Chris@32: cimag = malloc(size_m); Chris@32: if (cos_table == NULL || sin_table == NULL Chris@32: || areal == NULL || aimag == NULL Chris@32: || breal == NULL || bimag == NULL Chris@32: || creal == NULL || cimag == NULL) Chris@32: goto cleanup; Chris@32: Chris@32: // Trignometric tables Chris@32: for (i = 0; i < n; i++) { Chris@32: double temp = M_PI * (size_t)((unsigned long long)i * i % ((unsigned long long)n * 2)) / n; Chris@32: // Less accurate version if long long is unavailable: double temp = M_PI * i * i / n; Chris@32: cos_table[i] = cos(temp); Chris@32: sin_table[i] = sin(temp); Chris@32: } Chris@32: Chris@32: // Temporary vectors and preprocessing Chris@32: for (i = 0; i < n; i++) { Chris@32: areal[i] = real[i] * cos_table[i] + imag[i] * sin_table[i]; Chris@32: aimag[i] = -real[i] * sin_table[i] + imag[i] * cos_table[i]; Chris@32: } Chris@32: breal[0] = cos_table[0]; Chris@32: bimag[0] = sin_table[0]; Chris@32: for (i = 1; i < n; i++) { Chris@32: breal[i] = breal[m - i] = cos_table[i]; Chris@32: bimag[i] = bimag[m - i] = sin_table[i]; Chris@32: } Chris@32: Chris@32: // Convolution Chris@32: if (!convolve_complex(areal, aimag, breal, bimag, creal, cimag, m)) Chris@32: goto cleanup; Chris@32: Chris@32: // Postprocessing Chris@32: for (i = 0; i < n; i++) { Chris@32: real[i] = creal[i] * cos_table[i] + cimag[i] * sin_table[i]; Chris@32: imag[i] = -creal[i] * sin_table[i] + cimag[i] * cos_table[i]; Chris@32: } Chris@32: status = 1; Chris@32: Chris@32: // Deallocation Chris@32: cleanup: Chris@32: free(cimag); Chris@32: free(creal); Chris@32: free(bimag); Chris@32: free(breal); Chris@32: free(aimag); Chris@32: free(areal); Chris@32: free(sin_table); Chris@32: free(cos_table); Chris@32: return status; Chris@32: } Chris@32: Chris@32: Chris@32: int convolve_real(const double x[], const double y[], double out[], size_t n) { Chris@32: double *ximag, *yimag, *zimag; Chris@32: int status = 0; Chris@32: ximag = calloc(n, sizeof(double)); Chris@32: yimag = calloc(n, sizeof(double)); Chris@32: zimag = calloc(n, sizeof(double)); Chris@32: if (ximag == NULL || yimag == NULL || zimag == NULL) Chris@32: goto cleanup; Chris@32: Chris@32: status = convolve_complex(x, ximag, y, yimag, out, zimag, n); Chris@32: cleanup: Chris@32: free(zimag); Chris@32: free(yimag); Chris@32: free(ximag); Chris@32: return status; Chris@32: } Chris@32: Chris@32: Chris@32: int convolve_complex(const double xreal[], const double ximag[], const double yreal[], const double yimag[], double outreal[], double outimag[], size_t n) { Chris@32: int status = 0; Chris@32: size_t size; Chris@32: size_t i; Chris@32: double *xr, *xi, *yr, *yi; Chris@32: if (SIZE_MAX / sizeof(double) < n) Chris@32: return 0; Chris@32: size = n * sizeof(double); Chris@32: xr = memdup(xreal, size); Chris@32: xi = memdup(ximag, size); Chris@32: yr = memdup(yreal, size); Chris@32: yi = memdup(yimag, size); Chris@32: if (xr == NULL || xi == NULL || yr == NULL || yi == NULL) Chris@32: goto cleanup; Chris@32: Chris@32: if (!transform(xr, xi, n)) Chris@32: goto cleanup; Chris@32: if (!transform(yr, yi, n)) Chris@32: goto cleanup; Chris@32: for (i = 0; i < n; i++) { Chris@32: double temp = xr[i] * yr[i] - xi[i] * yi[i]; Chris@32: xi[i] = xi[i] * yr[i] + xr[i] * yi[i]; Chris@32: xr[i] = temp; Chris@32: } Chris@32: if (!inverse_transform(xr, xi, n)) Chris@32: goto cleanup; Chris@32: for (i = 0; i < n; i++) { // Scaling (because this FFT implementation omits it) Chris@32: outreal[i] = xr[i] / n; Chris@32: outimag[i] = xi[i] / n; Chris@32: } Chris@32: status = 1; Chris@32: Chris@32: cleanup: Chris@32: free(yi); Chris@32: free(yr); Chris@32: free(xi); Chris@32: free(xr); Chris@32: return status; Chris@32: } Chris@32: Chris@32: Chris@32: static size_t reverse_bits(size_t x, unsigned int n) { Chris@32: size_t result = 0; Chris@32: unsigned int i; Chris@32: for (i = 0; i < n; i++, x >>= 1) Chris@32: result = (result << 1) | (x & 1); Chris@32: return result; Chris@32: } Chris@32: Chris@32: Chris@32: static void *memdup(const void *src, size_t n) { Chris@32: void *dest = malloc(n); Chris@32: if (dest != NULL) Chris@32: memcpy(dest, src, n); Chris@32: return dest; Chris@32: }