Chris@32
|
1 /*
|
Chris@32
|
2 * Free FFT and convolution (C)
|
Chris@32
|
3 *
|
Chris@32
|
4 * Copyright (c) 2014 Project Nayuki
|
Chris@32
|
5 * http://www.nayuki.io/page/free-small-fft-in-multiple-languages
|
Chris@32
|
6 *
|
Chris@32
|
7 * (MIT License)
|
Chris@32
|
8 * Permission is hereby granted, free of charge, to any person obtaining a copy of
|
Chris@32
|
9 * this software and associated documentation files (the "Software"), to deal in
|
Chris@32
|
10 * the Software without restriction, including without limitation the rights to
|
Chris@32
|
11 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
Chris@32
|
12 * the Software, and to permit persons to whom the Software is furnished to do so,
|
Chris@32
|
13 * subject to the following conditions:
|
Chris@32
|
14 * - The above copyright notice and this permission notice shall be included in
|
Chris@32
|
15 * all copies or substantial portions of the Software.
|
Chris@32
|
16 * - The Software is provided "as is", without warranty of any kind, express or
|
Chris@32
|
17 * implied, including but not limited to the warranties of merchantability,
|
Chris@32
|
18 * fitness for a particular purpose and noninfringement. In no event shall the
|
Chris@32
|
19 * authors or copyright holders be liable for any claim, damages or other
|
Chris@32
|
20 * liability, whether in an action of contract, tort or otherwise, arising from,
|
Chris@32
|
21 * out of or in connection with the Software or the use or other dealings in the
|
Chris@32
|
22 * Software.
|
Chris@32
|
23 */
|
Chris@32
|
24
|
Chris@32
|
25 #include <math.h>
|
Chris@32
|
26 #include <stdlib.h>
|
Chris@32
|
27 #include <string.h>
|
Chris@32
|
28 #include <stdio.h>
|
Chris@32
|
29 #include "fft.h"
|
Chris@32
|
30
|
Chris@32
|
31
|
Chris@32
|
32 // Private function prototypes
|
Chris@32
|
33 static size_t reverse_bits(size_t x, unsigned int n);
|
Chris@32
|
34 static void *memdup(const void *src, size_t n);
|
Chris@32
|
35
|
Chris@32
|
36 #define SIZE_MAX ((size_t)-1)
|
Chris@32
|
37
|
Chris@32
|
38
|
Chris@32
|
39 int transform(double real[], double imag[], size_t n) {
|
Chris@32
|
40 if (n == 0)
|
Chris@32
|
41 return 1;
|
Chris@32
|
42 else if ((n & (n - 1)) == 0) // Is power of 2
|
Chris@32
|
43 return transform_radix2(real, imag, n);
|
Chris@32
|
44 else // More complicated algorithm for arbitrary sizes
|
Chris@32
|
45 return transform_bluestein(real, imag, n);
|
Chris@32
|
46 }
|
Chris@32
|
47
|
Chris@32
|
48
|
Chris@32
|
49 int inverse_transform(double real[], double imag[], size_t n) {
|
Chris@32
|
50 return transform(imag, real, n);
|
Chris@32
|
51 }
|
Chris@32
|
52
|
Chris@32
|
53 tables *precalc(size_t n) {
|
Chris@32
|
54 unsigned int levels;
|
Chris@32
|
55 // Compute levels = floor(log2(n))
|
Chris@32
|
56 {
|
Chris@32
|
57 size_t temp = n;
|
Chris@32
|
58 levels = 0;
|
Chris@32
|
59 while (temp > 1) {
|
Chris@32
|
60 levels++;
|
Chris@32
|
61 temp >>= 1;
|
Chris@32
|
62 }
|
Chris@32
|
63 if (1u << levels != n)
|
Chris@32
|
64 return 0; // n is not a power of 2
|
Chris@32
|
65 }
|
Chris@32
|
66 if (SIZE_MAX / sizeof(double) < n / 2) return 0;
|
Chris@32
|
67 tables *tables = malloc(sizeof(tables));
|
Chris@32
|
68 if (!tables) return tables;
|
Chris@32
|
69 tables->levels = levels;
|
Chris@32
|
70 size_t size = (n / 2) * sizeof(double);
|
Chris@32
|
71 tables->cos = malloc(size);
|
Chris@32
|
72 if (!tables->cos) {
|
Chris@32
|
73 free(tables);
|
Chris@32
|
74 return 0;
|
Chris@32
|
75 }
|
Chris@32
|
76 tables->sin = malloc(size);
|
Chris@32
|
77 if (!tables->sin) {
|
Chris@32
|
78 free(tables->cos);
|
Chris@32
|
79 free(tables);
|
Chris@32
|
80 return 0;
|
Chris@32
|
81 }
|
Chris@32
|
82 int i;
|
Chris@32
|
83 for (i = 0; i < n / 2; i++) {
|
Chris@32
|
84 tables->cos[i] = cos(2 * M_PI * i / n);
|
Chris@32
|
85 tables->sin[i] = sin(2 * M_PI * i / n);
|
Chris@32
|
86 }
|
Chris@32
|
87 return tables;
|
Chris@32
|
88 }
|
Chris@32
|
89
|
Chris@32
|
90 void dispose(tables *tables) {
|
Chris@32
|
91 if (!tables) return;
|
Chris@32
|
92 free(tables->cos);
|
Chris@32
|
93 free(tables->sin);
|
Chris@32
|
94 free(tables);
|
Chris@32
|
95 }
|
Chris@32
|
96
|
Chris@32
|
97 void transform_radix2_precalc(double real[], double imag[], int n, tables *tables) {
|
Chris@32
|
98 double *cos_table, *sin_table;
|
Chris@32
|
99 int size;
|
Chris@32
|
100 int i;
|
Chris@32
|
101
|
Chris@32
|
102 // Trignometric tables
|
Chris@32
|
103 cos_table = tables->cos;
|
Chris@32
|
104 sin_table = tables->sin;
|
Chris@32
|
105
|
Chris@32
|
106 // Bit-reversed addressing permutation
|
Chris@32
|
107 for (i = 0; i < n; i++) {
|
Chris@32
|
108 int j = reverse_bits(i, tables->levels);
|
Chris@32
|
109 if (j > i) {
|
Chris@32
|
110 double temp = real[i];
|
Chris@32
|
111 real[i] = real[j];
|
Chris@32
|
112 real[j] = temp;
|
Chris@32
|
113 temp = imag[i];
|
Chris@32
|
114 imag[i] = imag[j];
|
Chris@32
|
115 imag[j] = temp;
|
Chris@32
|
116 }
|
Chris@32
|
117 }
|
Chris@32
|
118
|
Chris@32
|
119 // Cooley-Tukey decimation-in-time radix-2 FFT
|
Chris@32
|
120 for (size = 2; size <= n; size *= 2) {
|
Chris@32
|
121 int halfsize = size / 2;
|
Chris@32
|
122 int tablestep = n / size;
|
Chris@32
|
123 for (i = 0; i < n; i += size) {
|
Chris@32
|
124 int j;
|
Chris@32
|
125 int k;
|
Chris@32
|
126 for (j = i, k = 0; j < i + halfsize; j++, k += tablestep) {
|
Chris@32
|
127 double tpre = real[j+halfsize] * cos_table[k] + imag[j+halfsize] * sin_table[k];
|
Chris@32
|
128 double tpim = -real[j+halfsize] * sin_table[k] + imag[j+halfsize] * cos_table[k];
|
Chris@32
|
129 real[j + halfsize] = real[j] - tpre;
|
Chris@32
|
130 imag[j + halfsize] = imag[j] - tpim;
|
Chris@32
|
131 real[j] += tpre;
|
Chris@32
|
132 imag[j] += tpim;
|
Chris@32
|
133 }
|
Chris@32
|
134 }
|
Chris@32
|
135 if (size == n) // Prevent overflow in 'size *= 2'
|
Chris@32
|
136 break;
|
Chris@32
|
137 }
|
Chris@32
|
138 }
|
Chris@32
|
139
|
Chris@32
|
140 int transform_radix2(double real[], double imag[], size_t n) {
|
Chris@32
|
141 // Variables
|
Chris@32
|
142 int status = 0;
|
Chris@32
|
143 unsigned int levels;
|
Chris@32
|
144 double *cos_table, *sin_table;
|
Chris@32
|
145 size_t size;
|
Chris@32
|
146 size_t i;
|
Chris@32
|
147
|
Chris@32
|
148 // Compute levels = floor(log2(n))
|
Chris@32
|
149 {
|
Chris@32
|
150 size_t temp = n;
|
Chris@32
|
151 levels = 0;
|
Chris@32
|
152 while (temp > 1) {
|
Chris@32
|
153 levels++;
|
Chris@32
|
154 temp >>= 1;
|
Chris@32
|
155 }
|
Chris@32
|
156 if (1u << levels != n)
|
Chris@32
|
157 return 0; // n is not a power of 2
|
Chris@32
|
158 }
|
Chris@32
|
159
|
Chris@32
|
160 // Trignometric tables
|
Chris@32
|
161 if (SIZE_MAX / sizeof(double) < n / 2)
|
Chris@32
|
162 return 0;
|
Chris@32
|
163 size = (n / 2) * sizeof(double);
|
Chris@32
|
164 cos_table = malloc(size);
|
Chris@32
|
165 sin_table = malloc(size);
|
Chris@32
|
166 if (cos_table == NULL || sin_table == NULL)
|
Chris@32
|
167 goto cleanup;
|
Chris@32
|
168 for (i = 0; i < n / 2; i++) {
|
Chris@32
|
169 cos_table[i] = cos(2 * M_PI * i / n);
|
Chris@32
|
170 sin_table[i] = sin(2 * M_PI * i / n);
|
Chris@32
|
171 }
|
Chris@32
|
172
|
Chris@32
|
173 // Bit-reversed addressing permutation
|
Chris@32
|
174 for (i = 0; i < n; i++) {
|
Chris@32
|
175 size_t j = reverse_bits(i, levels);
|
Chris@32
|
176 if (j > i) {
|
Chris@32
|
177 double temp = real[i];
|
Chris@32
|
178 real[i] = real[j];
|
Chris@32
|
179 real[j] = temp;
|
Chris@32
|
180 temp = imag[i];
|
Chris@32
|
181 imag[i] = imag[j];
|
Chris@32
|
182 imag[j] = temp;
|
Chris@32
|
183 }
|
Chris@32
|
184 }
|
Chris@32
|
185
|
Chris@32
|
186 // Cooley-Tukey decimation-in-time radix-2 FFT
|
Chris@32
|
187 for (size = 2; size <= n; size *= 2) {
|
Chris@32
|
188 size_t halfsize = size / 2;
|
Chris@32
|
189 size_t tablestep = n / size;
|
Chris@32
|
190 for (i = 0; i < n; i += size) {
|
Chris@32
|
191 size_t j;
|
Chris@32
|
192 size_t k;
|
Chris@32
|
193 for (j = i, k = 0; j < i + halfsize; j++, k += tablestep) {
|
Chris@32
|
194 double tpre = real[j+halfsize] * cos_table[k] + imag[j+halfsize] * sin_table[k];
|
Chris@32
|
195 double tpim = -real[j+halfsize] * sin_table[k] + imag[j+halfsize] * cos_table[k];
|
Chris@32
|
196 real[j + halfsize] = real[j] - tpre;
|
Chris@32
|
197 imag[j + halfsize] = imag[j] - tpim;
|
Chris@32
|
198 real[j] += tpre;
|
Chris@32
|
199 imag[j] += tpim;
|
Chris@32
|
200 }
|
Chris@32
|
201 }
|
Chris@32
|
202 if (size == n) // Prevent overflow in 'size *= 2'
|
Chris@32
|
203 break;
|
Chris@32
|
204 }
|
Chris@32
|
205 status = 1;
|
Chris@32
|
206
|
Chris@32
|
207 cleanup:
|
Chris@32
|
208 free(sin_table);
|
Chris@32
|
209 free(cos_table);
|
Chris@32
|
210 return status;
|
Chris@32
|
211 }
|
Chris@32
|
212
|
Chris@32
|
213
|
Chris@32
|
214 int transform_bluestein(double real[], double imag[], size_t n) {
|
Chris@32
|
215 // Variables
|
Chris@32
|
216 int status = 0;
|
Chris@32
|
217 double *cos_table, *sin_table;
|
Chris@32
|
218 double *areal, *aimag;
|
Chris@32
|
219 double *breal, *bimag;
|
Chris@32
|
220 double *creal, *cimag;
|
Chris@32
|
221 size_t m;
|
Chris@32
|
222 size_t size_n, size_m;
|
Chris@32
|
223 size_t i;
|
Chris@32
|
224
|
Chris@32
|
225 // Find a power-of-2 convolution length m such that m >= n * 2 + 1
|
Chris@32
|
226 {
|
Chris@32
|
227 size_t target;
|
Chris@32
|
228 if (n > (SIZE_MAX - 1) / 2)
|
Chris@32
|
229 return 0;
|
Chris@32
|
230 target = n * 2 + 1;
|
Chris@32
|
231 for (m = 1; m < target; m *= 2) {
|
Chris@32
|
232 if (SIZE_MAX / 2 < m)
|
Chris@32
|
233 return 0;
|
Chris@32
|
234 }
|
Chris@32
|
235 }
|
Chris@32
|
236
|
Chris@32
|
237 // Allocate memory
|
Chris@32
|
238 if (SIZE_MAX / sizeof(double) < n || SIZE_MAX / sizeof(double) < m)
|
Chris@32
|
239 return 0;
|
Chris@32
|
240 size_n = n * sizeof(double);
|
Chris@32
|
241 size_m = m * sizeof(double);
|
Chris@32
|
242 cos_table = malloc(size_n);
|
Chris@32
|
243 sin_table = malloc(size_n);
|
Chris@32
|
244 areal = calloc(m, sizeof(double));
|
Chris@32
|
245 aimag = calloc(m, sizeof(double));
|
Chris@32
|
246 breal = calloc(m, sizeof(double));
|
Chris@32
|
247 bimag = calloc(m, sizeof(double));
|
Chris@32
|
248 creal = malloc(size_m);
|
Chris@32
|
249 cimag = malloc(size_m);
|
Chris@32
|
250 if (cos_table == NULL || sin_table == NULL
|
Chris@32
|
251 || areal == NULL || aimag == NULL
|
Chris@32
|
252 || breal == NULL || bimag == NULL
|
Chris@32
|
253 || creal == NULL || cimag == NULL)
|
Chris@32
|
254 goto cleanup;
|
Chris@32
|
255
|
Chris@32
|
256 // Trignometric tables
|
Chris@32
|
257 for (i = 0; i < n; i++) {
|
Chris@32
|
258 double temp = M_PI * (size_t)((unsigned long long)i * i % ((unsigned long long)n * 2)) / n;
|
Chris@32
|
259 // Less accurate version if long long is unavailable: double temp = M_PI * i * i / n;
|
Chris@32
|
260 cos_table[i] = cos(temp);
|
Chris@32
|
261 sin_table[i] = sin(temp);
|
Chris@32
|
262 }
|
Chris@32
|
263
|
Chris@32
|
264 // Temporary vectors and preprocessing
|
Chris@32
|
265 for (i = 0; i < n; i++) {
|
Chris@32
|
266 areal[i] = real[i] * cos_table[i] + imag[i] * sin_table[i];
|
Chris@32
|
267 aimag[i] = -real[i] * sin_table[i] + imag[i] * cos_table[i];
|
Chris@32
|
268 }
|
Chris@32
|
269 breal[0] = cos_table[0];
|
Chris@32
|
270 bimag[0] = sin_table[0];
|
Chris@32
|
271 for (i = 1; i < n; i++) {
|
Chris@32
|
272 breal[i] = breal[m - i] = cos_table[i];
|
Chris@32
|
273 bimag[i] = bimag[m - i] = sin_table[i];
|
Chris@32
|
274 }
|
Chris@32
|
275
|
Chris@32
|
276 // Convolution
|
Chris@32
|
277 if (!convolve_complex(areal, aimag, breal, bimag, creal, cimag, m))
|
Chris@32
|
278 goto cleanup;
|
Chris@32
|
279
|
Chris@32
|
280 // Postprocessing
|
Chris@32
|
281 for (i = 0; i < n; i++) {
|
Chris@32
|
282 real[i] = creal[i] * cos_table[i] + cimag[i] * sin_table[i];
|
Chris@32
|
283 imag[i] = -creal[i] * sin_table[i] + cimag[i] * cos_table[i];
|
Chris@32
|
284 }
|
Chris@32
|
285 status = 1;
|
Chris@32
|
286
|
Chris@32
|
287 // Deallocation
|
Chris@32
|
288 cleanup:
|
Chris@32
|
289 free(cimag);
|
Chris@32
|
290 free(creal);
|
Chris@32
|
291 free(bimag);
|
Chris@32
|
292 free(breal);
|
Chris@32
|
293 free(aimag);
|
Chris@32
|
294 free(areal);
|
Chris@32
|
295 free(sin_table);
|
Chris@32
|
296 free(cos_table);
|
Chris@32
|
297 return status;
|
Chris@32
|
298 }
|
Chris@32
|
299
|
Chris@32
|
300
|
Chris@32
|
301 int convolve_real(const double x[], const double y[], double out[], size_t n) {
|
Chris@32
|
302 double *ximag, *yimag, *zimag;
|
Chris@32
|
303 int status = 0;
|
Chris@32
|
304 ximag = calloc(n, sizeof(double));
|
Chris@32
|
305 yimag = calloc(n, sizeof(double));
|
Chris@32
|
306 zimag = calloc(n, sizeof(double));
|
Chris@32
|
307 if (ximag == NULL || yimag == NULL || zimag == NULL)
|
Chris@32
|
308 goto cleanup;
|
Chris@32
|
309
|
Chris@32
|
310 status = convolve_complex(x, ximag, y, yimag, out, zimag, n);
|
Chris@32
|
311 cleanup:
|
Chris@32
|
312 free(zimag);
|
Chris@32
|
313 free(yimag);
|
Chris@32
|
314 free(ximag);
|
Chris@32
|
315 return status;
|
Chris@32
|
316 }
|
Chris@32
|
317
|
Chris@32
|
318
|
Chris@32
|
319 int convolve_complex(const double xreal[], const double ximag[], const double yreal[], const double yimag[], double outreal[], double outimag[], size_t n) {
|
Chris@32
|
320 int status = 0;
|
Chris@32
|
321 size_t size;
|
Chris@32
|
322 size_t i;
|
Chris@32
|
323 double *xr, *xi, *yr, *yi;
|
Chris@32
|
324 if (SIZE_MAX / sizeof(double) < n)
|
Chris@32
|
325 return 0;
|
Chris@32
|
326 size = n * sizeof(double);
|
Chris@32
|
327 xr = memdup(xreal, size);
|
Chris@32
|
328 xi = memdup(ximag, size);
|
Chris@32
|
329 yr = memdup(yreal, size);
|
Chris@32
|
330 yi = memdup(yimag, size);
|
Chris@32
|
331 if (xr == NULL || xi == NULL || yr == NULL || yi == NULL)
|
Chris@32
|
332 goto cleanup;
|
Chris@32
|
333
|
Chris@32
|
334 if (!transform(xr, xi, n))
|
Chris@32
|
335 goto cleanup;
|
Chris@32
|
336 if (!transform(yr, yi, n))
|
Chris@32
|
337 goto cleanup;
|
Chris@32
|
338 for (i = 0; i < n; i++) {
|
Chris@32
|
339 double temp = xr[i] * yr[i] - xi[i] * yi[i];
|
Chris@32
|
340 xi[i] = xi[i] * yr[i] + xr[i] * yi[i];
|
Chris@32
|
341 xr[i] = temp;
|
Chris@32
|
342 }
|
Chris@32
|
343 if (!inverse_transform(xr, xi, n))
|
Chris@32
|
344 goto cleanup;
|
Chris@32
|
345 for (i = 0; i < n; i++) { // Scaling (because this FFT implementation omits it)
|
Chris@32
|
346 outreal[i] = xr[i] / n;
|
Chris@32
|
347 outimag[i] = xi[i] / n;
|
Chris@32
|
348 }
|
Chris@32
|
349 status = 1;
|
Chris@32
|
350
|
Chris@32
|
351 cleanup:
|
Chris@32
|
352 free(yi);
|
Chris@32
|
353 free(yr);
|
Chris@32
|
354 free(xi);
|
Chris@32
|
355 free(xr);
|
Chris@32
|
356 return status;
|
Chris@32
|
357 }
|
Chris@32
|
358
|
Chris@32
|
359
|
Chris@32
|
360 static size_t reverse_bits(size_t x, unsigned int n) {
|
Chris@32
|
361 size_t result = 0;
|
Chris@32
|
362 unsigned int i;
|
Chris@32
|
363 for (i = 0; i < n; i++, x >>= 1)
|
Chris@32
|
364 result = (result << 1) | (x & 1);
|
Chris@32
|
365 return result;
|
Chris@32
|
366 }
|
Chris@32
|
367
|
Chris@32
|
368
|
Chris@32
|
369 static void *memdup(const void *src, size_t n) {
|
Chris@32
|
370 void *dest = malloc(n);
|
Chris@32
|
371 if (dest != NULL)
|
Chris@32
|
372 memcpy(dest, src, n);
|
Chris@32
|
373 return dest;
|
Chris@32
|
374 }
|