cannam@127
|
1 /*
|
cannam@127
|
2 * Copyright (c) 2003, 2007-14 Matteo Frigo
|
cannam@127
|
3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
cannam@127
|
4 *
|
cannam@127
|
5 * This program is free software; you can redistribute it and/or modify
|
cannam@127
|
6 * it under the terms of the GNU General Public License as published by
|
cannam@127
|
7 * the Free Software Foundation; either version 2 of the License, or
|
cannam@127
|
8 * (at your option) any later version.
|
cannam@127
|
9 *
|
cannam@127
|
10 * This program is distributed in the hope that it will be useful,
|
cannam@127
|
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
cannam@127
|
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
cannam@127
|
13 * GNU General Public License for more details.
|
cannam@127
|
14 *
|
cannam@127
|
15 * You should have received a copy of the GNU General Public License
|
cannam@127
|
16 * along with this program; if not, write to the Free Software
|
cannam@127
|
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
cannam@127
|
18 *
|
cannam@127
|
19 */
|
cannam@127
|
20
|
cannam@127
|
21
|
cannam@127
|
22 /* FFTW internal header file */
|
cannam@127
|
23 #ifndef __IFFTW_H__
|
cannam@127
|
24 #define __IFFTW_H__
|
cannam@127
|
25
|
cannam@127
|
26 #include "config.h"
|
cannam@127
|
27
|
cannam@127
|
28 #include <stdlib.h> /* size_t */
|
cannam@127
|
29 #include <stdarg.h> /* va_list */
|
cannam@127
|
30 #include <stddef.h> /* ptrdiff_t */
|
cannam@127
|
31 #include <limits.h> /* INT_MAX */
|
cannam@127
|
32
|
cannam@127
|
33 #if HAVE_SYS_TYPES_H
|
cannam@127
|
34 # include <sys/types.h>
|
cannam@127
|
35 #endif
|
cannam@127
|
36
|
cannam@127
|
37 #if HAVE_STDINT_H
|
cannam@127
|
38 # include <stdint.h> /* uintptr_t, maybe */
|
cannam@127
|
39 #endif
|
cannam@127
|
40
|
cannam@127
|
41 #if HAVE_INTTYPES_H
|
cannam@127
|
42 # include <inttypes.h> /* uintptr_t, maybe */
|
cannam@127
|
43 #endif
|
cannam@127
|
44
|
cannam@127
|
45 #ifdef __cplusplus
|
cannam@127
|
46 extern "C"
|
cannam@127
|
47 {
|
cannam@127
|
48 #endif /* __cplusplus */
|
cannam@127
|
49
|
cannam@127
|
50 /* Windows annoyances -- since tests/hook.c uses some internal
|
cannam@127
|
51 FFTW functions, we need to given them the dllexport attribute
|
cannam@127
|
52 under Windows when compiling as a DLL (see api/fftw3.h). */
|
cannam@127
|
53 #if defined(FFTW_EXTERN)
|
cannam@127
|
54 # define IFFTW_EXTERN FFTW_EXTERN
|
cannam@127
|
55 #elif (defined(FFTW_DLL) || defined(DLL_EXPORT)) \
|
cannam@127
|
56 && (defined(_WIN32) || defined(__WIN32__))
|
cannam@127
|
57 # define IFFTW_EXTERN extern __declspec(dllexport)
|
cannam@127
|
58 #else
|
cannam@127
|
59 # define IFFTW_EXTERN extern
|
cannam@127
|
60 #endif
|
cannam@127
|
61
|
cannam@127
|
62 /* determine precision and name-mangling scheme */
|
cannam@127
|
63 #define CONCAT(prefix, name) prefix ## name
|
cannam@127
|
64 #if defined(FFTW_SINGLE)
|
cannam@127
|
65 typedef float R;
|
cannam@127
|
66 # define X(name) CONCAT(fftwf_, name)
|
cannam@127
|
67 #elif defined(FFTW_LDOUBLE)
|
cannam@127
|
68 typedef long double R;
|
cannam@127
|
69 # define X(name) CONCAT(fftwl_, name)
|
cannam@127
|
70 # define TRIGREAL_IS_LONG_DOUBLE
|
cannam@127
|
71 #elif defined(FFTW_QUAD)
|
cannam@127
|
72 typedef __float128 R;
|
cannam@127
|
73 # define X(name) CONCAT(fftwq_, name)
|
cannam@127
|
74 # define TRIGREAL_IS_QUAD
|
cannam@127
|
75 #else
|
cannam@127
|
76 typedef double R;
|
cannam@127
|
77 # define X(name) CONCAT(fftw_, name)
|
cannam@127
|
78 #endif
|
cannam@127
|
79
|
cannam@127
|
80 /*
|
cannam@127
|
81 integral type large enough to contain a stride (what ``int'' should
|
cannam@127
|
82 have been in the first place.
|
cannam@127
|
83 */
|
cannam@127
|
84 typedef ptrdiff_t INT;
|
cannam@127
|
85
|
cannam@127
|
86 /* dummy use of unused parameters to silence compiler warnings */
|
cannam@127
|
87 #define UNUSED(x) (void)x
|
cannam@127
|
88
|
cannam@127
|
89 #define NELEM(array) ((sizeof(array) / sizeof((array)[0])))
|
cannam@127
|
90
|
cannam@127
|
91 #define FFT_SIGN (-1) /* sign convention for forward transforms */
|
cannam@127
|
92 extern void X(extract_reim)(int sign, R *c, R **r, R **i);
|
cannam@127
|
93
|
cannam@127
|
94 #define REGISTER_SOLVER(p, s) X(solver_register)(p, s)
|
cannam@127
|
95
|
cannam@127
|
96 #define STRINGIZEx(x) #x
|
cannam@127
|
97 #define STRINGIZE(x) STRINGIZEx(x)
|
cannam@127
|
98 #define CIMPLIES(ante, post) (!(ante) || (post))
|
cannam@127
|
99
|
cannam@127
|
100 /* define HAVE_SIMD if any simd extensions are supported */
|
cannam@127
|
101 #if defined(HAVE_SSE) || defined(HAVE_SSE2) || \
|
cannam@127
|
102 defined(HAVE_AVX) || defined(HAVE_AVX_128_FMA) || \
|
cannam@127
|
103 defined(HAVE_AVX2) || defined(HAVE_AVX512) || \
|
cannam@127
|
104 defined(HAVE_KCVI) || \
|
cannam@127
|
105 defined(HAVE_ALTIVEC) || defined(HAVE_VSX) || \
|
cannam@127
|
106 defined(HAVE_MIPS_PS) || \
|
cannam@127
|
107 defined(HAVE_GENERIC_SIMD128) || defined(HAVE_GENERIC_SIMD256)
|
cannam@127
|
108 #define HAVE_SIMD 1
|
cannam@127
|
109 #else
|
cannam@127
|
110 #define HAVE_SIMD 0
|
cannam@127
|
111 #endif
|
cannam@127
|
112
|
cannam@127
|
113 extern int X(have_simd_sse2)(void);
|
cannam@127
|
114 extern int X(have_simd_avx)(void);
|
cannam@127
|
115 extern int X(have_simd_avx_128_fma)(void);
|
cannam@127
|
116 extern int X(have_simd_avx2)(void);
|
cannam@127
|
117 extern int X(have_simd_avx2_128)(void);
|
cannam@127
|
118 extern int X(have_simd_avx512)(void);
|
cannam@127
|
119 extern int X(have_simd_altivec)(void);
|
cannam@127
|
120 extern int X(have_simd_vsx)(void);
|
cannam@127
|
121 extern int X(have_simd_neon)(void);
|
cannam@127
|
122
|
cannam@127
|
123 /* forward declarations */
|
cannam@127
|
124 typedef struct problem_s problem;
|
cannam@127
|
125 typedef struct plan_s plan;
|
cannam@127
|
126 typedef struct solver_s solver;
|
cannam@127
|
127 typedef struct planner_s planner;
|
cannam@127
|
128 typedef struct printer_s printer;
|
cannam@127
|
129 typedef struct scanner_s scanner;
|
cannam@127
|
130
|
cannam@127
|
131 /*-----------------------------------------------------------------------*/
|
cannam@127
|
132 /* alloca: */
|
cannam@127
|
133 #if HAVE_SIMD
|
cannam@127
|
134 # if defined(HAVE_KCVI) || defined(HAVE_AVX512)
|
cannam@127
|
135 # define MIN_ALIGNMENT 64
|
cannam@127
|
136 # elif defined(HAVE_AVX) || defined(HAVE_AVX2) || defined(HAVE_GENERIC_SIMD256)
|
cannam@127
|
137 # define MIN_ALIGNMENT 32 /* best alignment for AVX, conservative for
|
cannam@127
|
138 * everything else */
|
cannam@127
|
139 # else
|
cannam@127
|
140 /* Note that we cannot use 32-byte alignment for all SIMD. For
|
cannam@127
|
141 example, MacOS X malloc is 16-byte aligned, but there was no
|
cannam@127
|
142 posix_memalign in MacOS X until version 10.6. */
|
cannam@127
|
143 # define MIN_ALIGNMENT 16
|
cannam@127
|
144 # endif
|
cannam@127
|
145 #endif
|
cannam@127
|
146
|
cannam@127
|
147 #if defined(HAVE_ALLOCA) && defined(FFTW_ENABLE_ALLOCA)
|
cannam@127
|
148 /* use alloca if available */
|
cannam@127
|
149
|
cannam@127
|
150 #ifndef alloca
|
cannam@127
|
151 #ifdef __GNUC__
|
cannam@127
|
152 # define alloca __builtin_alloca
|
cannam@127
|
153 #else
|
cannam@127
|
154 # ifdef _MSC_VER
|
cannam@127
|
155 # include <malloc.h>
|
cannam@127
|
156 # define alloca _alloca
|
cannam@127
|
157 # else
|
cannam@127
|
158 # if HAVE_ALLOCA_H
|
cannam@127
|
159 # include <alloca.h>
|
cannam@127
|
160 # else
|
cannam@127
|
161 # ifdef _AIX
|
cannam@127
|
162 #pragma alloca
|
cannam@127
|
163 # else
|
cannam@127
|
164 # ifndef alloca /* predefined by HP cc +Olibcalls */
|
cannam@127
|
165 void *alloca(size_t);
|
cannam@127
|
166 # endif
|
cannam@127
|
167 # endif
|
cannam@127
|
168 # endif
|
cannam@127
|
169 # endif
|
cannam@127
|
170 #endif
|
cannam@127
|
171 #endif
|
cannam@127
|
172
|
cannam@127
|
173 # ifdef MIN_ALIGNMENT
|
cannam@127
|
174 # define STACK_MALLOC(T, p, n) \
|
cannam@127
|
175 { \
|
cannam@127
|
176 p = (T)alloca((n) + MIN_ALIGNMENT); \
|
cannam@127
|
177 p = (T)(((uintptr_t)p + (MIN_ALIGNMENT - 1)) & \
|
cannam@127
|
178 (~(uintptr_t)(MIN_ALIGNMENT - 1))); \
|
cannam@127
|
179 }
|
cannam@127
|
180 # define STACK_FREE(n)
|
cannam@127
|
181 # else /* HAVE_ALLOCA && !defined(MIN_ALIGNMENT) */
|
cannam@127
|
182 # define STACK_MALLOC(T, p, n) p = (T)alloca(n)
|
cannam@127
|
183 # define STACK_FREE(n)
|
cannam@127
|
184 # endif
|
cannam@127
|
185
|
cannam@127
|
186 #else /* ! HAVE_ALLOCA */
|
cannam@127
|
187 /* use malloc instead of alloca */
|
cannam@127
|
188 # define STACK_MALLOC(T, p, n) p = (T)MALLOC(n, OTHER)
|
cannam@127
|
189 # define STACK_FREE(n) X(ifree)(n)
|
cannam@127
|
190 #endif /* ! HAVE_ALLOCA */
|
cannam@127
|
191
|
cannam@127
|
192 /* allocation of buffers. If these grow too large use malloc(), else
|
cannam@127
|
193 use STACK_MALLOC (hopefully reducing to alloca()). */
|
cannam@127
|
194
|
cannam@127
|
195 /* 64KiB ought to be enough for anybody */
|
cannam@127
|
196 #define MAX_STACK_ALLOC ((size_t)64 * 1024)
|
cannam@127
|
197
|
cannam@127
|
198 #define BUF_ALLOC(T, p, n) \
|
cannam@127
|
199 { \
|
cannam@127
|
200 if (n < MAX_STACK_ALLOC) { \
|
cannam@127
|
201 STACK_MALLOC(T, p, n); \
|
cannam@127
|
202 } else { \
|
cannam@127
|
203 p = (T)MALLOC(n, BUFFERS); \
|
cannam@127
|
204 } \
|
cannam@127
|
205 }
|
cannam@127
|
206
|
cannam@127
|
207 #define BUF_FREE(p, n) \
|
cannam@127
|
208 { \
|
cannam@127
|
209 if (n < MAX_STACK_ALLOC) { \
|
cannam@127
|
210 STACK_FREE(p); \
|
cannam@127
|
211 } else { \
|
cannam@127
|
212 X(ifree)(p); \
|
cannam@127
|
213 } \
|
cannam@127
|
214 }
|
cannam@127
|
215
|
cannam@127
|
216 /*-----------------------------------------------------------------------*/
|
cannam@127
|
217 /* define uintptr_t if it is not already defined */
|
cannam@127
|
218
|
cannam@127
|
219 #ifndef HAVE_UINTPTR_T
|
cannam@127
|
220 # if SIZEOF_VOID_P == 0
|
cannam@127
|
221 # error sizeof void* is unknown!
|
cannam@127
|
222 # elif SIZEOF_UNSIGNED_INT == SIZEOF_VOID_P
|
cannam@127
|
223 typedef unsigned int uintptr_t;
|
cannam@127
|
224 # elif SIZEOF_UNSIGNED_LONG == SIZEOF_VOID_P
|
cannam@127
|
225 typedef unsigned long uintptr_t;
|
cannam@127
|
226 # elif SIZEOF_UNSIGNED_LONG_LONG == SIZEOF_VOID_P
|
cannam@127
|
227 typedef unsigned long long uintptr_t;
|
cannam@127
|
228 # else
|
cannam@127
|
229 # error no unsigned integer type matches void* sizeof!
|
cannam@127
|
230 # endif
|
cannam@127
|
231 #endif
|
cannam@127
|
232
|
cannam@127
|
233 /*-----------------------------------------------------------------------*/
|
cannam@127
|
234 /* We can do an optimization for copying pairs of (aligned) floats
|
cannam@127
|
235 when in single precision if 2*float = double. */
|
cannam@127
|
236
|
cannam@127
|
237 #define FFTW_2R_IS_DOUBLE (defined(FFTW_SINGLE) \
|
cannam@127
|
238 && SIZEOF_FLOAT != 0 \
|
cannam@127
|
239 && SIZEOF_DOUBLE == 2*SIZEOF_FLOAT)
|
cannam@127
|
240
|
cannam@127
|
241 #define DOUBLE_ALIGNED(p) ((((uintptr_t)(p)) % sizeof(double)) == 0)
|
cannam@127
|
242
|
cannam@127
|
243 /*-----------------------------------------------------------------------*/
|
cannam@127
|
244 /* assert.c: */
|
cannam@127
|
245 IFFTW_EXTERN void X(assertion_failed)(const char *s,
|
cannam@127
|
246 int line, const char *file);
|
cannam@127
|
247
|
cannam@127
|
248 /* always check */
|
cannam@127
|
249 #define CK(ex) \
|
cannam@127
|
250 (void)((ex) || (X(assertion_failed)(#ex, __LINE__, __FILE__), 0))
|
cannam@127
|
251
|
cannam@127
|
252 #ifdef FFTW_DEBUG
|
cannam@127
|
253 /* check only if debug enabled */
|
cannam@127
|
254 #define A(ex) \
|
cannam@127
|
255 (void)((ex) || (X(assertion_failed)(#ex, __LINE__, __FILE__), 0))
|
cannam@127
|
256 #else
|
cannam@127
|
257 #define A(ex) /* nothing */
|
cannam@127
|
258 #endif
|
cannam@127
|
259
|
cannam@127
|
260 extern void X(debug)(const char *format, ...);
|
cannam@127
|
261 #define D X(debug)
|
cannam@127
|
262
|
cannam@127
|
263 /*-----------------------------------------------------------------------*/
|
cannam@127
|
264 /* kalloc.c: */
|
cannam@127
|
265 extern void *X(kernel_malloc)(size_t n);
|
cannam@127
|
266 extern void X(kernel_free)(void *p);
|
cannam@127
|
267
|
cannam@127
|
268 /*-----------------------------------------------------------------------*/
|
cannam@127
|
269 /* alloc.c: */
|
cannam@127
|
270
|
cannam@127
|
271 /* objects allocated by malloc, for statistical purposes */
|
cannam@127
|
272 enum malloc_tag {
|
cannam@127
|
273 EVERYTHING,
|
cannam@127
|
274 PLANS,
|
cannam@127
|
275 SOLVERS,
|
cannam@127
|
276 PROBLEMS,
|
cannam@127
|
277 BUFFERS,
|
cannam@127
|
278 HASHT,
|
cannam@127
|
279 TENSORS,
|
cannam@127
|
280 PLANNERS,
|
cannam@127
|
281 SLVDESCS,
|
cannam@127
|
282 TWIDDLES,
|
cannam@127
|
283 STRIDES,
|
cannam@127
|
284 OTHER,
|
cannam@127
|
285 MALLOC_WHAT_LAST /* must be last */
|
cannam@127
|
286 };
|
cannam@127
|
287
|
cannam@127
|
288 IFFTW_EXTERN void X(ifree)(void *ptr);
|
cannam@127
|
289 extern void X(ifree0)(void *ptr);
|
cannam@127
|
290
|
cannam@127
|
291 #ifdef FFTW_DEBUG_MALLOC
|
cannam@127
|
292
|
cannam@127
|
293 IFFTW_EXTERN void *X(malloc_debug)(size_t n, enum malloc_tag what,
|
cannam@127
|
294 const char *file, int line);
|
cannam@127
|
295 #define MALLOC(n, what) X(malloc_debug)(n, what, __FILE__, __LINE__)
|
cannam@127
|
296 IFFTW_EXTERN void X(malloc_print_minfo)(int vrbose);
|
cannam@127
|
297
|
cannam@127
|
298 #else /* ! FFTW_DEBUG_MALLOC */
|
cannam@127
|
299
|
cannam@127
|
300 IFFTW_EXTERN void *X(malloc_plain)(size_t sz);
|
cannam@127
|
301 #define MALLOC(n, what) X(malloc_plain)(n)
|
cannam@127
|
302
|
cannam@127
|
303 #endif
|
cannam@127
|
304
|
cannam@127
|
305 #if defined(FFTW_DEBUG) && defined(FFTW_DEBUG_MALLOC) && (defined(HAVE_THREADS) || defined(HAVE_OPENMP))
|
cannam@127
|
306 extern int X(in_thread);
|
cannam@127
|
307 # define IN_THREAD X(in_thread)
|
cannam@127
|
308 # define THREAD_ON { int in_thread_save = X(in_thread); X(in_thread) = 1
|
cannam@127
|
309 # define THREAD_OFF X(in_thread) = in_thread_save; }
|
cannam@127
|
310 #else
|
cannam@127
|
311 # define IN_THREAD 0
|
cannam@127
|
312 # define THREAD_ON
|
cannam@127
|
313 # define THREAD_OFF
|
cannam@127
|
314 #endif
|
cannam@127
|
315
|
cannam@127
|
316 /*-----------------------------------------------------------------------*/
|
cannam@127
|
317 /* low-resolution clock */
|
cannam@127
|
318
|
cannam@127
|
319 #ifdef FAKE_CRUDE_TIME
|
cannam@127
|
320 typedef int crude_time;
|
cannam@127
|
321 #else
|
cannam@127
|
322 # if TIME_WITH_SYS_TIME
|
cannam@127
|
323 # include <sys/time.h>
|
cannam@127
|
324 # include <time.h>
|
cannam@127
|
325 # else
|
cannam@127
|
326 # if HAVE_SYS_TIME_H
|
cannam@127
|
327 # include <sys/time.h>
|
cannam@127
|
328 # else
|
cannam@127
|
329 # include <time.h>
|
cannam@127
|
330 # endif
|
cannam@127
|
331 # endif
|
cannam@127
|
332
|
cannam@127
|
333 # ifdef HAVE_BSDGETTIMEOFDAY
|
cannam@127
|
334 # ifndef HAVE_GETTIMEOFDAY
|
cannam@127
|
335 # define gettimeofday BSDgettimeofday
|
cannam@127
|
336 # define HAVE_GETTIMEOFDAY 1
|
cannam@127
|
337 # endif
|
cannam@127
|
338 # endif
|
cannam@127
|
339
|
cannam@127
|
340 # if defined(HAVE_GETTIMEOFDAY)
|
cannam@127
|
341 typedef struct timeval crude_time;
|
cannam@127
|
342 # else
|
cannam@127
|
343 typedef clock_t crude_time;
|
cannam@127
|
344 # endif
|
cannam@127
|
345 #endif /* else FAKE_CRUDE_TIME */
|
cannam@127
|
346
|
cannam@127
|
347 crude_time X(get_crude_time)(void);
|
cannam@127
|
348 double X(elapsed_since)(const planner *plnr, const problem *p,
|
cannam@127
|
349 crude_time t0); /* time in seconds since t0 */
|
cannam@127
|
350
|
cannam@127
|
351 /*-----------------------------------------------------------------------*/
|
cannam@127
|
352 /* ops.c: */
|
cannam@127
|
353 /*
|
cannam@127
|
354 * ops counter. The total number of additions is add + fma
|
cannam@127
|
355 * and the total number of multiplications is mul + fma.
|
cannam@127
|
356 * Total flops = add + mul + 2 * fma
|
cannam@127
|
357 */
|
cannam@127
|
358 typedef struct {
|
cannam@127
|
359 double add;
|
cannam@127
|
360 double mul;
|
cannam@127
|
361 double fma;
|
cannam@127
|
362 double other;
|
cannam@127
|
363 } opcnt;
|
cannam@127
|
364
|
cannam@127
|
365 void X(ops_zero)(opcnt *dst);
|
cannam@127
|
366 void X(ops_other)(INT o, opcnt *dst);
|
cannam@127
|
367 void X(ops_cpy)(const opcnt *src, opcnt *dst);
|
cannam@127
|
368
|
cannam@127
|
369 void X(ops_add)(const opcnt *a, const opcnt *b, opcnt *dst);
|
cannam@127
|
370 void X(ops_add2)(const opcnt *a, opcnt *dst);
|
cannam@127
|
371
|
cannam@127
|
372 /* dst = m * a + b */
|
cannam@127
|
373 void X(ops_madd)(INT m, const opcnt *a, const opcnt *b, opcnt *dst);
|
cannam@127
|
374
|
cannam@127
|
375 /* dst += m * a */
|
cannam@127
|
376 void X(ops_madd2)(INT m, const opcnt *a, opcnt *dst);
|
cannam@127
|
377
|
cannam@127
|
378
|
cannam@127
|
379 /*-----------------------------------------------------------------------*/
|
cannam@127
|
380 /* minmax.c: */
|
cannam@127
|
381 INT X(imax)(INT a, INT b);
|
cannam@127
|
382 INT X(imin)(INT a, INT b);
|
cannam@127
|
383
|
cannam@127
|
384 /*-----------------------------------------------------------------------*/
|
cannam@127
|
385 /* iabs.c: */
|
cannam@127
|
386 INT X(iabs)(INT a);
|
cannam@127
|
387
|
cannam@127
|
388 /* inline version */
|
cannam@127
|
389 #define IABS(x) (((x) < 0) ? (0 - (x)) : (x))
|
cannam@127
|
390
|
cannam@127
|
391 /*-----------------------------------------------------------------------*/
|
cannam@127
|
392 /* md5.c */
|
cannam@127
|
393
|
cannam@127
|
394 #if SIZEOF_UNSIGNED_INT >= 4
|
cannam@127
|
395 typedef unsigned int md5uint;
|
cannam@127
|
396 #else
|
cannam@127
|
397 typedef unsigned long md5uint; /* at least 32 bits as per C standard */
|
cannam@127
|
398 #endif
|
cannam@127
|
399
|
cannam@127
|
400 typedef md5uint md5sig[4];
|
cannam@127
|
401
|
cannam@127
|
402 typedef struct {
|
cannam@127
|
403 md5sig s; /* state and signature */
|
cannam@127
|
404
|
cannam@127
|
405 /* fields not meant to be used outside md5.c: */
|
cannam@127
|
406 unsigned char c[64]; /* stuff not yet processed */
|
cannam@127
|
407 unsigned l; /* total length. Should be 64 bits long, but this is
|
cannam@127
|
408 good enough for us */
|
cannam@127
|
409 } md5;
|
cannam@127
|
410
|
cannam@127
|
411 void X(md5begin)(md5 *p);
|
cannam@127
|
412 void X(md5putb)(md5 *p, const void *d_, size_t len);
|
cannam@127
|
413 void X(md5puts)(md5 *p, const char *s);
|
cannam@127
|
414 void X(md5putc)(md5 *p, unsigned char c);
|
cannam@127
|
415 void X(md5int)(md5 *p, int i);
|
cannam@127
|
416 void X(md5INT)(md5 *p, INT i);
|
cannam@127
|
417 void X(md5unsigned)(md5 *p, unsigned i);
|
cannam@127
|
418 void X(md5end)(md5 *p);
|
cannam@127
|
419
|
cannam@127
|
420 /*-----------------------------------------------------------------------*/
|
cannam@127
|
421 /* tensor.c: */
|
cannam@127
|
422 #define STRUCT_HACK_KR
|
cannam@127
|
423 #undef STRUCT_HACK_C99
|
cannam@127
|
424
|
cannam@127
|
425 typedef struct {
|
cannam@127
|
426 INT n;
|
cannam@127
|
427 INT is; /* input stride */
|
cannam@127
|
428 INT os; /* output stride */
|
cannam@127
|
429 } iodim;
|
cannam@127
|
430
|
cannam@127
|
431 typedef struct {
|
cannam@127
|
432 int rnk;
|
cannam@127
|
433 #if defined(STRUCT_HACK_KR)
|
cannam@127
|
434 iodim dims[1];
|
cannam@127
|
435 #elif defined(STRUCT_HACK_C99)
|
cannam@127
|
436 iodim dims[];
|
cannam@127
|
437 #else
|
cannam@127
|
438 iodim *dims;
|
cannam@127
|
439 #endif
|
cannam@127
|
440 } tensor;
|
cannam@127
|
441
|
cannam@127
|
442 /*
|
cannam@127
|
443 Definition of rank -infinity.
|
cannam@127
|
444 This definition has the property that if you want rank 0 or 1,
|
cannam@127
|
445 you can simply test for rank <= 1. This is a common case.
|
cannam@127
|
446
|
cannam@127
|
447 A tensor of rank -infinity has size 0.
|
cannam@127
|
448 */
|
cannam@127
|
449 #define RNK_MINFTY INT_MAX
|
cannam@127
|
450 #define FINITE_RNK(rnk) ((rnk) != RNK_MINFTY)
|
cannam@127
|
451
|
cannam@127
|
452 typedef enum { INPLACE_IS, INPLACE_OS } inplace_kind;
|
cannam@127
|
453
|
cannam@127
|
454 tensor *X(mktensor)(int rnk);
|
cannam@127
|
455 tensor *X(mktensor_0d)(void);
|
cannam@127
|
456 tensor *X(mktensor_1d)(INT n, INT is, INT os);
|
cannam@127
|
457 tensor *X(mktensor_2d)(INT n0, INT is0, INT os0,
|
cannam@127
|
458 INT n1, INT is1, INT os1);
|
cannam@127
|
459 tensor *X(mktensor_3d)(INT n0, INT is0, INT os0,
|
cannam@127
|
460 INT n1, INT is1, INT os1,
|
cannam@127
|
461 INT n2, INT is2, INT os2);
|
cannam@127
|
462 tensor *X(mktensor_4d)(INT n0, INT is0, INT os0,
|
cannam@127
|
463 INT n1, INT is1, INT os1,
|
cannam@127
|
464 INT n2, INT is2, INT os2,
|
cannam@127
|
465 INT n3, INT is3, INT os3);
|
cannam@127
|
466 tensor *X(mktensor_5d)(INT n0, INT is0, INT os0,
|
cannam@127
|
467 INT n1, INT is1, INT os1,
|
cannam@127
|
468 INT n2, INT is2, INT os2,
|
cannam@127
|
469 INT n3, INT is3, INT os3,
|
cannam@127
|
470 INT n4, INT is4, INT os4);
|
cannam@127
|
471 INT X(tensor_sz)(const tensor *sz);
|
cannam@127
|
472 void X(tensor_md5)(md5 *p, const tensor *t);
|
cannam@127
|
473 INT X(tensor_max_index)(const tensor *sz);
|
cannam@127
|
474 INT X(tensor_min_istride)(const tensor *sz);
|
cannam@127
|
475 INT X(tensor_min_ostride)(const tensor *sz);
|
cannam@127
|
476 INT X(tensor_min_stride)(const tensor *sz);
|
cannam@127
|
477 int X(tensor_inplace_strides)(const tensor *sz);
|
cannam@127
|
478 int X(tensor_inplace_strides2)(const tensor *a, const tensor *b);
|
cannam@127
|
479 int X(tensor_strides_decrease)(const tensor *sz, const tensor *vecsz,
|
cannam@127
|
480 inplace_kind k);
|
cannam@127
|
481 tensor *X(tensor_copy)(const tensor *sz);
|
cannam@127
|
482 int X(tensor_kosherp)(const tensor *x);
|
cannam@127
|
483
|
cannam@127
|
484 tensor *X(tensor_copy_inplace)(const tensor *sz, inplace_kind k);
|
cannam@127
|
485 tensor *X(tensor_copy_except)(const tensor *sz, int except_dim);
|
cannam@127
|
486 tensor *X(tensor_copy_sub)(const tensor *sz, int start_dim, int rnk);
|
cannam@127
|
487 tensor *X(tensor_compress)(const tensor *sz);
|
cannam@127
|
488 tensor *X(tensor_compress_contiguous)(const tensor *sz);
|
cannam@127
|
489 tensor *X(tensor_append)(const tensor *a, const tensor *b);
|
cannam@127
|
490 void X(tensor_split)(const tensor *sz, tensor **a, int a_rnk, tensor **b);
|
cannam@127
|
491 int X(tensor_tornk1)(const tensor *t, INT *n, INT *is, INT *os);
|
cannam@127
|
492 void X(tensor_destroy)(tensor *sz);
|
cannam@127
|
493 void X(tensor_destroy2)(tensor *a, tensor *b);
|
cannam@127
|
494 void X(tensor_destroy4)(tensor *a, tensor *b, tensor *c, tensor *d);
|
cannam@127
|
495 void X(tensor_print)(const tensor *sz, printer *p);
|
cannam@127
|
496 int X(dimcmp)(const iodim *a, const iodim *b);
|
cannam@127
|
497 int X(tensor_equal)(const tensor *a, const tensor *b);
|
cannam@127
|
498 int X(tensor_inplace_locations)(const tensor *sz, const tensor *vecsz);
|
cannam@127
|
499
|
cannam@127
|
500 /*-----------------------------------------------------------------------*/
|
cannam@127
|
501 /* problem.c: */
|
cannam@127
|
502 enum {
|
cannam@127
|
503 /* a problem that cannot be solved */
|
cannam@127
|
504 PROBLEM_UNSOLVABLE,
|
cannam@127
|
505
|
cannam@127
|
506 PROBLEM_DFT,
|
cannam@127
|
507 PROBLEM_RDFT,
|
cannam@127
|
508 PROBLEM_RDFT2,
|
cannam@127
|
509
|
cannam@127
|
510 /* for mpi/ subdirectory */
|
cannam@127
|
511 PROBLEM_MPI_DFT,
|
cannam@127
|
512 PROBLEM_MPI_RDFT,
|
cannam@127
|
513 PROBLEM_MPI_RDFT2,
|
cannam@127
|
514 PROBLEM_MPI_TRANSPOSE,
|
cannam@127
|
515
|
cannam@127
|
516 PROBLEM_LAST
|
cannam@127
|
517 };
|
cannam@127
|
518
|
cannam@127
|
519 typedef struct {
|
cannam@127
|
520 int problem_kind;
|
cannam@127
|
521 void (*hash) (const problem *ego, md5 *p);
|
cannam@127
|
522 void (*zero) (const problem *ego);
|
cannam@127
|
523 void (*print) (const problem *ego, printer *p);
|
cannam@127
|
524 void (*destroy) (problem *ego);
|
cannam@127
|
525 } problem_adt;
|
cannam@127
|
526
|
cannam@127
|
527 struct problem_s {
|
cannam@127
|
528 const problem_adt *adt;
|
cannam@127
|
529 };
|
cannam@127
|
530
|
cannam@127
|
531 problem *X(mkproblem)(size_t sz, const problem_adt *adt);
|
cannam@127
|
532 void X(problem_destroy)(problem *ego);
|
cannam@127
|
533 problem *X(mkproblem_unsolvable)(void);
|
cannam@127
|
534
|
cannam@127
|
535 /*-----------------------------------------------------------------------*/
|
cannam@127
|
536 /* print.c */
|
cannam@127
|
537 struct printer_s {
|
cannam@127
|
538 void (*print)(printer *p, const char *format, ...);
|
cannam@127
|
539 void (*vprint)(printer *p, const char *format, va_list ap);
|
cannam@127
|
540 void (*putchr)(printer *p, char c);
|
cannam@127
|
541 void (*cleanup)(printer *p);
|
cannam@127
|
542 int indent;
|
cannam@127
|
543 int indent_incr;
|
cannam@127
|
544 };
|
cannam@127
|
545
|
cannam@127
|
546 printer *X(mkprinter)(size_t size,
|
cannam@127
|
547 void (*putchr)(printer *p, char c),
|
cannam@127
|
548 void (*cleanup)(printer *p));
|
cannam@127
|
549 IFFTW_EXTERN void X(printer_destroy)(printer *p);
|
cannam@127
|
550
|
cannam@127
|
551 /*-----------------------------------------------------------------------*/
|
cannam@127
|
552 /* scan.c */
|
cannam@127
|
553 struct scanner_s {
|
cannam@127
|
554 int (*scan)(scanner *sc, const char *format, ...);
|
cannam@127
|
555 int (*vscan)(scanner *sc, const char *format, va_list ap);
|
cannam@127
|
556 int (*getchr)(scanner *sc);
|
cannam@127
|
557 int ungotc;
|
cannam@127
|
558 };
|
cannam@127
|
559
|
cannam@127
|
560 scanner *X(mkscanner)(size_t size, int (*getchr)(scanner *sc));
|
cannam@127
|
561 void X(scanner_destroy)(scanner *sc);
|
cannam@127
|
562
|
cannam@127
|
563 /*-----------------------------------------------------------------------*/
|
cannam@127
|
564 /* plan.c: */
|
cannam@127
|
565
|
cannam@127
|
566 enum wakefulness {
|
cannam@127
|
567 SLEEPY,
|
cannam@127
|
568 AWAKE_ZERO,
|
cannam@127
|
569 AWAKE_SQRTN_TABLE,
|
cannam@127
|
570 AWAKE_SINCOS
|
cannam@127
|
571 };
|
cannam@127
|
572
|
cannam@127
|
573 typedef struct {
|
cannam@127
|
574 void (*solve)(const plan *ego, const problem *p);
|
cannam@127
|
575 void (*awake)(plan *ego, enum wakefulness wakefulness);
|
cannam@127
|
576 void (*print)(const plan *ego, printer *p);
|
cannam@127
|
577 void (*destroy)(plan *ego);
|
cannam@127
|
578 } plan_adt;
|
cannam@127
|
579
|
cannam@127
|
580 struct plan_s {
|
cannam@127
|
581 const plan_adt *adt;
|
cannam@127
|
582 opcnt ops;
|
cannam@127
|
583 double pcost;
|
cannam@127
|
584 enum wakefulness wakefulness; /* used for debugging only */
|
cannam@127
|
585 int could_prune_now_p;
|
cannam@127
|
586 };
|
cannam@127
|
587
|
cannam@127
|
588 plan *X(mkplan)(size_t size, const plan_adt *adt);
|
cannam@127
|
589 void X(plan_destroy_internal)(plan *ego);
|
cannam@127
|
590 IFFTW_EXTERN void X(plan_awake)(plan *ego, enum wakefulness wakefulness);
|
cannam@127
|
591 void X(plan_null_destroy)(plan *ego);
|
cannam@127
|
592
|
cannam@127
|
593 /*-----------------------------------------------------------------------*/
|
cannam@127
|
594 /* solver.c: */
|
cannam@127
|
595 typedef struct {
|
cannam@127
|
596 int problem_kind;
|
cannam@127
|
597 plan *(*mkplan)(const solver *ego, const problem *p, planner *plnr);
|
cannam@127
|
598 void (*destroy)(solver *ego);
|
cannam@127
|
599 } solver_adt;
|
cannam@127
|
600
|
cannam@127
|
601 struct solver_s {
|
cannam@127
|
602 const solver_adt *adt;
|
cannam@127
|
603 int refcnt;
|
cannam@127
|
604 };
|
cannam@127
|
605
|
cannam@127
|
606 solver *X(mksolver)(size_t size, const solver_adt *adt);
|
cannam@127
|
607 void X(solver_use)(solver *ego);
|
cannam@127
|
608 void X(solver_destroy)(solver *ego);
|
cannam@127
|
609 void X(solver_register)(planner *plnr, solver *s);
|
cannam@127
|
610
|
cannam@127
|
611 /* shorthand */
|
cannam@127
|
612 #define MKSOLVER(type, adt) (type *)X(mksolver)(sizeof(type), adt)
|
cannam@127
|
613
|
cannam@127
|
614 /*-----------------------------------------------------------------------*/
|
cannam@127
|
615 /* planner.c */
|
cannam@127
|
616
|
cannam@127
|
617 typedef struct slvdesc_s {
|
cannam@127
|
618 solver *slv;
|
cannam@127
|
619 const char *reg_nam;
|
cannam@127
|
620 unsigned nam_hash;
|
cannam@127
|
621 int reg_id;
|
cannam@127
|
622 int next_for_same_problem_kind;
|
cannam@127
|
623 } slvdesc;
|
cannam@127
|
624
|
cannam@127
|
625 typedef struct solution_s solution; /* opaque */
|
cannam@127
|
626
|
cannam@127
|
627 /* interpretation of L and U:
|
cannam@127
|
628
|
cannam@127
|
629 - if it returns a plan, the planner guarantees that all applicable
|
cannam@127
|
630 plans at least as impatient as U have been tried, and that each
|
cannam@127
|
631 plan in the solution is at least as impatient as L.
|
cannam@127
|
632
|
cannam@127
|
633 - if it returns 0, the planner guarantees to have tried all solvers
|
cannam@127
|
634 at least as impatient as L, and that none of them was applicable.
|
cannam@127
|
635
|
cannam@127
|
636 The structure is packed to fit into 64 bits.
|
cannam@127
|
637 */
|
cannam@127
|
638
|
cannam@127
|
639 typedef struct {
|
cannam@127
|
640 unsigned l:20;
|
cannam@127
|
641 unsigned hash_info:3;
|
cannam@127
|
642 # define BITS_FOR_TIMELIMIT 9
|
cannam@127
|
643 unsigned timelimit_impatience:BITS_FOR_TIMELIMIT;
|
cannam@127
|
644 unsigned u:20;
|
cannam@127
|
645
|
cannam@127
|
646 /* abstraction break: we store the solver here to pad the
|
cannam@127
|
647 structure to 64 bits. Otherwise, the struct is padded to 64
|
cannam@127
|
648 bits anyway, and another word is allocated for slvndx. */
|
cannam@127
|
649 # define BITS_FOR_SLVNDX 12
|
cannam@127
|
650 unsigned slvndx:BITS_FOR_SLVNDX;
|
cannam@127
|
651 } flags_t;
|
cannam@127
|
652
|
cannam@127
|
653 /* impatience flags */
|
cannam@127
|
654 enum {
|
cannam@127
|
655 BELIEVE_PCOST = 0x0001,
|
cannam@127
|
656 ESTIMATE = 0x0002,
|
cannam@127
|
657 NO_DFT_R2HC = 0x0004,
|
cannam@127
|
658 NO_SLOW = 0x0008,
|
cannam@127
|
659 NO_VRECURSE = 0x0010,
|
cannam@127
|
660 NO_INDIRECT_OP = 0x0020,
|
cannam@127
|
661 NO_LARGE_GENERIC = 0x0040,
|
cannam@127
|
662 NO_RANK_SPLITS = 0x0080,
|
cannam@127
|
663 NO_VRANK_SPLITS = 0x0100,
|
cannam@127
|
664 NO_NONTHREADED = 0x0200,
|
cannam@127
|
665 NO_BUFFERING = 0x0400,
|
cannam@127
|
666 NO_FIXED_RADIX_LARGE_N = 0x0800,
|
cannam@127
|
667 NO_DESTROY_INPUT = 0x1000,
|
cannam@127
|
668 NO_SIMD = 0x2000,
|
cannam@127
|
669 CONSERVE_MEMORY = 0x4000,
|
cannam@127
|
670 NO_DHT_R2HC = 0x8000,
|
cannam@127
|
671 NO_UGLY = 0x10000,
|
cannam@127
|
672 ALLOW_PRUNING = 0x20000
|
cannam@127
|
673 };
|
cannam@127
|
674
|
cannam@127
|
675 /* hashtable information */
|
cannam@127
|
676 enum {
|
cannam@127
|
677 BLESSING = 0x1u, /* save this entry */
|
cannam@127
|
678 H_VALID = 0x2u, /* valid hastable entry */
|
cannam@127
|
679 H_LIVE = 0x4u /* entry is nonempty, implies H_VALID */
|
cannam@127
|
680 };
|
cannam@127
|
681
|
cannam@127
|
682 #define PLNR_L(plnr) ((plnr)->flags.l)
|
cannam@127
|
683 #define PLNR_U(plnr) ((plnr)->flags.u)
|
cannam@127
|
684 #define PLNR_TIMELIMIT_IMPATIENCE(plnr) ((plnr)->flags.timelimit_impatience)
|
cannam@127
|
685
|
cannam@127
|
686 #define ESTIMATEP(plnr) (PLNR_U(plnr) & ESTIMATE)
|
cannam@127
|
687 #define BELIEVE_PCOSTP(plnr) (PLNR_U(plnr) & BELIEVE_PCOST)
|
cannam@127
|
688 #define ALLOW_PRUNINGP(plnr) (PLNR_U(plnr) & ALLOW_PRUNING)
|
cannam@127
|
689
|
cannam@127
|
690 #define NO_INDIRECT_OP_P(plnr) (PLNR_L(plnr) & NO_INDIRECT_OP)
|
cannam@127
|
691 #define NO_LARGE_GENERICP(plnr) (PLNR_L(plnr) & NO_LARGE_GENERIC)
|
cannam@127
|
692 #define NO_RANK_SPLITSP(plnr) (PLNR_L(plnr) & NO_RANK_SPLITS)
|
cannam@127
|
693 #define NO_VRANK_SPLITSP(plnr) (PLNR_L(plnr) & NO_VRANK_SPLITS)
|
cannam@127
|
694 #define NO_VRECURSEP(plnr) (PLNR_L(plnr) & NO_VRECURSE)
|
cannam@127
|
695 #define NO_DFT_R2HCP(plnr) (PLNR_L(plnr) & NO_DFT_R2HC)
|
cannam@127
|
696 #define NO_SLOWP(plnr) (PLNR_L(plnr) & NO_SLOW)
|
cannam@127
|
697 #define NO_UGLYP(plnr) (PLNR_L(plnr) & NO_UGLY)
|
cannam@127
|
698 #define NO_FIXED_RADIX_LARGE_NP(plnr) \
|
cannam@127
|
699 (PLNR_L(plnr) & NO_FIXED_RADIX_LARGE_N)
|
cannam@127
|
700 #define NO_NONTHREADEDP(plnr) \
|
cannam@127
|
701 ((PLNR_L(plnr) & NO_NONTHREADED) && (plnr)->nthr > 1)
|
cannam@127
|
702
|
cannam@127
|
703 #define NO_DESTROY_INPUTP(plnr) (PLNR_L(plnr) & NO_DESTROY_INPUT)
|
cannam@127
|
704 #define NO_SIMDP(plnr) (PLNR_L(plnr) & NO_SIMD)
|
cannam@127
|
705 #define CONSERVE_MEMORYP(plnr) (PLNR_L(plnr) & CONSERVE_MEMORY)
|
cannam@127
|
706 #define NO_DHT_R2HCP(plnr) (PLNR_L(plnr) & NO_DHT_R2HC)
|
cannam@127
|
707 #define NO_BUFFERINGP(plnr) (PLNR_L(plnr) & NO_BUFFERING)
|
cannam@127
|
708
|
cannam@127
|
709 typedef enum { FORGET_ACCURSED, FORGET_EVERYTHING } amnesia;
|
cannam@127
|
710
|
cannam@127
|
711 typedef enum {
|
cannam@127
|
712 /* WISDOM_NORMAL: planner may or may not use wisdom */
|
cannam@127
|
713 WISDOM_NORMAL,
|
cannam@127
|
714
|
cannam@127
|
715 /* WISDOM_ONLY: planner must use wisdom and must avoid searching */
|
cannam@127
|
716 WISDOM_ONLY,
|
cannam@127
|
717
|
cannam@127
|
718 /* WISDOM_IS_BOGUS: planner must return 0 as quickly as possible */
|
cannam@127
|
719 WISDOM_IS_BOGUS,
|
cannam@127
|
720
|
cannam@127
|
721 /* WISDOM_IGNORE_INFEASIBLE: planner ignores infeasible wisdom */
|
cannam@127
|
722 WISDOM_IGNORE_INFEASIBLE,
|
cannam@127
|
723
|
cannam@127
|
724 /* WISDOM_IGNORE_ALL: planner ignores all */
|
cannam@127
|
725 WISDOM_IGNORE_ALL
|
cannam@127
|
726 } wisdom_state_t;
|
cannam@127
|
727
|
cannam@127
|
728 typedef struct {
|
cannam@127
|
729 void (*register_solver)(planner *ego, solver *s);
|
cannam@127
|
730 plan *(*mkplan)(planner *ego, const problem *p);
|
cannam@127
|
731 void (*forget)(planner *ego, amnesia a);
|
cannam@127
|
732 void (*exprt)(planner *ego, printer *p); /* ``export'' is a reserved
|
cannam@127
|
733 word in C++. */
|
cannam@127
|
734 int (*imprt)(planner *ego, scanner *sc);
|
cannam@127
|
735 } planner_adt;
|
cannam@127
|
736
|
cannam@127
|
737 /* hash table of solutions */
|
cannam@127
|
738 typedef struct {
|
cannam@127
|
739 solution *solutions;
|
cannam@127
|
740 unsigned hashsiz, nelem;
|
cannam@127
|
741
|
cannam@127
|
742 /* statistics */
|
cannam@127
|
743 int lookup, succ_lookup, lookup_iter;
|
cannam@127
|
744 int insert, insert_iter, insert_unknown;
|
cannam@127
|
745 int nrehash;
|
cannam@127
|
746 } hashtab;
|
cannam@127
|
747
|
cannam@127
|
748 typedef enum { COST_SUM, COST_MAX } cost_kind;
|
cannam@127
|
749
|
cannam@127
|
750 struct planner_s {
|
cannam@127
|
751 const planner_adt *adt;
|
cannam@127
|
752 void (*hook)(struct planner_s *plnr, plan *pln,
|
cannam@127
|
753 const problem *p, int optimalp);
|
cannam@127
|
754 double (*cost_hook)(const problem *p, double t, cost_kind k);
|
cannam@127
|
755 int (*wisdom_ok_hook)(const problem *p, flags_t flags);
|
cannam@127
|
756 void (*nowisdom_hook)(const problem *p);
|
cannam@127
|
757 wisdom_state_t (*bogosity_hook)(wisdom_state_t state, const problem *p);
|
cannam@127
|
758
|
cannam@127
|
759 /* solver descriptors */
|
cannam@127
|
760 slvdesc *slvdescs;
|
cannam@127
|
761 unsigned nslvdesc, slvdescsiz;
|
cannam@127
|
762 const char *cur_reg_nam;
|
cannam@127
|
763 int cur_reg_id;
|
cannam@127
|
764 int slvdescs_for_problem_kind[PROBLEM_LAST];
|
cannam@127
|
765
|
cannam@127
|
766 wisdom_state_t wisdom_state;
|
cannam@127
|
767
|
cannam@127
|
768 hashtab htab_blessed;
|
cannam@127
|
769 hashtab htab_unblessed;
|
cannam@127
|
770
|
cannam@127
|
771 int nthr;
|
cannam@127
|
772 flags_t flags;
|
cannam@127
|
773
|
cannam@127
|
774 crude_time start_time;
|
cannam@127
|
775 double timelimit; /* elapsed_since(start_time) at which to bail out */
|
cannam@127
|
776 int timed_out; /* whether most recent search timed out */
|
cannam@127
|
777 int need_timeout_check;
|
cannam@127
|
778
|
cannam@127
|
779 /* various statistics */
|
cannam@127
|
780 int nplan; /* number of plans evaluated */
|
cannam@127
|
781 double pcost, epcost; /* total pcost of measured/estimated plans */
|
cannam@127
|
782 int nprob; /* number of problems evaluated */
|
cannam@127
|
783 };
|
cannam@127
|
784
|
cannam@127
|
785 planner *X(mkplanner)(void);
|
cannam@127
|
786 void X(planner_destroy)(planner *ego);
|
cannam@127
|
787
|
cannam@127
|
788 /*
|
cannam@127
|
789 Iterate over all solvers. Read:
|
cannam@127
|
790
|
cannam@127
|
791 @article{ baker93iterators,
|
cannam@127
|
792 author = "Henry G. Baker, Jr.",
|
cannam@127
|
793 title = "Iterators: Signs of Weakness in Object-Oriented Languages",
|
cannam@127
|
794 journal = "{ACM} {OOPS} Messenger",
|
cannam@127
|
795 volume = "4",
|
cannam@127
|
796 number = "3",
|
cannam@127
|
797 pages = "18--25"
|
cannam@127
|
798 }
|
cannam@127
|
799 */
|
cannam@127
|
800 #define FORALL_SOLVERS(ego, s, p, what) \
|
cannam@127
|
801 { \
|
cannam@127
|
802 unsigned _cnt; \
|
cannam@127
|
803 for (_cnt = 0; _cnt < ego->nslvdesc; ++_cnt) { \
|
cannam@127
|
804 slvdesc *p = ego->slvdescs + _cnt; \
|
cannam@127
|
805 solver *s = p->slv; \
|
cannam@127
|
806 what; \
|
cannam@127
|
807 } \
|
cannam@127
|
808 }
|
cannam@127
|
809
|
cannam@127
|
810 #define FORALL_SOLVERS_OF_KIND(kind, ego, s, p, what) \
|
cannam@127
|
811 { \
|
cannam@127
|
812 int _cnt = ego->slvdescs_for_problem_kind[kind]; \
|
cannam@127
|
813 while (_cnt >= 0) { \
|
cannam@127
|
814 slvdesc *p = ego->slvdescs + _cnt; \
|
cannam@127
|
815 solver *s = p->slv; \
|
cannam@127
|
816 what; \
|
cannam@127
|
817 _cnt = p->next_for_same_problem_kind; \
|
cannam@127
|
818 } \
|
cannam@127
|
819 }
|
cannam@127
|
820
|
cannam@127
|
821
|
cannam@127
|
822 /* make plan, destroy problem */
|
cannam@127
|
823 plan *X(mkplan_d)(planner *ego, problem *p);
|
cannam@127
|
824 plan *X(mkplan_f_d)(planner *ego, problem *p,
|
cannam@127
|
825 unsigned l_set, unsigned u_set, unsigned u_reset);
|
cannam@127
|
826
|
cannam@127
|
827 /*-----------------------------------------------------------------------*/
|
cannam@127
|
828 /* stride.c: */
|
cannam@127
|
829
|
cannam@127
|
830 /* If PRECOMPUTE_ARRAY_INDICES is defined, precompute all strides. */
|
cannam@127
|
831 #if (defined(__i386__) || defined(__x86_64__) || _M_IX86 >= 500) && !defined(FFTW_LDOUBLE)
|
cannam@127
|
832 #define PRECOMPUTE_ARRAY_INDICES
|
cannam@127
|
833 #endif
|
cannam@127
|
834
|
cannam@127
|
835 extern const INT X(an_INT_guaranteed_to_be_zero);
|
cannam@127
|
836
|
cannam@127
|
837 #ifdef PRECOMPUTE_ARRAY_INDICES
|
cannam@127
|
838 typedef INT *stride;
|
cannam@127
|
839 #define WS(stride, i) (stride[i])
|
cannam@127
|
840 extern stride X(mkstride)(INT n, INT s);
|
cannam@127
|
841 void X(stride_destroy)(stride p);
|
cannam@127
|
842 /* hackery to prevent the compiler from copying the strides array
|
cannam@127
|
843 onto the stack */
|
cannam@127
|
844 #define MAKE_VOLATILE_STRIDE(nptr, x) (x) = (x) + X(an_INT_guaranteed_to_be_zero)
|
cannam@127
|
845 #else
|
cannam@127
|
846
|
cannam@127
|
847 typedef INT stride;
|
cannam@127
|
848 #define WS(stride, i) (stride * i)
|
cannam@127
|
849 #define fftwf_mkstride(n, stride) stride
|
cannam@127
|
850 #define fftw_mkstride(n, stride) stride
|
cannam@127
|
851 #define fftwl_mkstride(n, stride) stride
|
cannam@127
|
852 #define fftwf_stride_destroy(p) ((void) p)
|
cannam@127
|
853 #define fftw_stride_destroy(p) ((void) p)
|
cannam@127
|
854 #define fftwl_stride_destroy(p) ((void) p)
|
cannam@127
|
855
|
cannam@127
|
856 /* hackery to prevent the compiler from ``optimizing'' induction
|
cannam@127
|
857 variables in codelet loops. The problem is that for each K and for
|
cannam@127
|
858 each expression of the form P[I + STRIDE * K] in a loop, most
|
cannam@127
|
859 compilers will try to lift an induction variable PK := &P[I + STRIDE * K].
|
cannam@127
|
860 For large values of K this behavior overflows the
|
cannam@127
|
861 register set, which is likely worse than doing the index computation
|
cannam@127
|
862 in the first place.
|
cannam@127
|
863
|
cannam@127
|
864 If we guess that there are more than
|
cannam@127
|
865 ESTIMATED_AVAILABLE_INDEX_REGISTERS such pointers, we deliberately confuse
|
cannam@127
|
866 the compiler by setting STRIDE ^= ZERO, where ZERO is a value guaranteed to
|
cannam@127
|
867 be 0, but the compiler does not know this.
|
cannam@127
|
868
|
cannam@127
|
869 16 registers ought to be enough for anybody, or so the amd64 and ARM ISA's
|
cannam@127
|
870 seem to imply.
|
cannam@127
|
871 */
|
cannam@127
|
872 #define ESTIMATED_AVAILABLE_INDEX_REGISTERS 16
|
cannam@127
|
873 #define MAKE_VOLATILE_STRIDE(nptr, x) \
|
cannam@127
|
874 (nptr <= ESTIMATED_AVAILABLE_INDEX_REGISTERS ? \
|
cannam@127
|
875 0 : \
|
cannam@127
|
876 ((x) = (x) ^ X(an_INT_guaranteed_to_be_zero)))
|
cannam@127
|
877 #endif /* PRECOMPUTE_ARRAY_INDICES */
|
cannam@127
|
878
|
cannam@127
|
879 /*-----------------------------------------------------------------------*/
|
cannam@127
|
880 /* solvtab.c */
|
cannam@127
|
881
|
cannam@127
|
882 struct solvtab_s { void (*reg)(planner *); const char *reg_nam; };
|
cannam@127
|
883 typedef struct solvtab_s solvtab[];
|
cannam@127
|
884 void X(solvtab_exec)(const solvtab tbl, planner *p);
|
cannam@127
|
885 #define SOLVTAB(s) { s, STRINGIZE(s) }
|
cannam@127
|
886 #define SOLVTAB_END { 0, 0 }
|
cannam@127
|
887
|
cannam@127
|
888 /*-----------------------------------------------------------------------*/
|
cannam@127
|
889 /* pickdim.c */
|
cannam@127
|
890 int X(pickdim)(int which_dim, const int *buddies, size_t nbuddies,
|
cannam@127
|
891 const tensor *sz, int oop, int *dp);
|
cannam@127
|
892
|
cannam@127
|
893 /*-----------------------------------------------------------------------*/
|
cannam@127
|
894 /* twiddle.c */
|
cannam@127
|
895 /* little language to express twiddle factors computation */
|
cannam@127
|
896 enum { TW_COS = 0, TW_SIN = 1, TW_CEXP = 2, TW_NEXT = 3,
|
cannam@127
|
897 TW_FULL = 4, TW_HALF = 5 };
|
cannam@127
|
898
|
cannam@127
|
899 typedef struct {
|
cannam@127
|
900 unsigned char op;
|
cannam@127
|
901 signed char v;
|
cannam@127
|
902 short i;
|
cannam@127
|
903 } tw_instr;
|
cannam@127
|
904
|
cannam@127
|
905 typedef struct twid_s {
|
cannam@127
|
906 R *W; /* array of twiddle factors */
|
cannam@127
|
907 INT n, r, m; /* transform order, radix, # twiddle rows */
|
cannam@127
|
908 int refcnt;
|
cannam@127
|
909 const tw_instr *instr;
|
cannam@127
|
910 struct twid_s *cdr;
|
cannam@127
|
911 enum wakefulness wakefulness;
|
cannam@127
|
912 } twid;
|
cannam@127
|
913
|
cannam@127
|
914 INT X(twiddle_length)(INT r, const tw_instr *p);
|
cannam@127
|
915 void X(twiddle_awake)(enum wakefulness wakefulness,
|
cannam@127
|
916 twid **pp, const tw_instr *instr, INT n, INT r, INT m);
|
cannam@127
|
917
|
cannam@127
|
918 /*-----------------------------------------------------------------------*/
|
cannam@127
|
919 /* trig.c */
|
cannam@127
|
920 #if defined(TRIGREAL_IS_LONG_DOUBLE)
|
cannam@127
|
921 typedef long double trigreal;
|
cannam@127
|
922 #elif defined(TRIGREAL_IS_QUAD)
|
cannam@127
|
923 typedef __float128 trigreal;
|
cannam@127
|
924 #else
|
cannam@127
|
925 typedef double trigreal;
|
cannam@127
|
926 #endif
|
cannam@127
|
927
|
cannam@127
|
928 typedef struct triggen_s triggen;
|
cannam@127
|
929
|
cannam@127
|
930 struct triggen_s {
|
cannam@127
|
931 void (*cexp)(triggen *t, INT m, R *result);
|
cannam@127
|
932 void (*cexpl)(triggen *t, INT m, trigreal *result);
|
cannam@127
|
933 void (*rotate)(triggen *p, INT m, R xr, R xi, R *res);
|
cannam@127
|
934
|
cannam@127
|
935 INT twshft;
|
cannam@127
|
936 INT twradix;
|
cannam@127
|
937 INT twmsk;
|
cannam@127
|
938 trigreal *W0, *W1;
|
cannam@127
|
939 INT n;
|
cannam@127
|
940 };
|
cannam@127
|
941
|
cannam@127
|
942 triggen *X(mktriggen)(enum wakefulness wakefulness, INT n);
|
cannam@127
|
943 void X(triggen_destroy)(triggen *p);
|
cannam@127
|
944
|
cannam@127
|
945 /*-----------------------------------------------------------------------*/
|
cannam@127
|
946 /* primes.c: */
|
cannam@127
|
947
|
cannam@127
|
948 #define MULMOD(x, y, p) \
|
cannam@127
|
949 (((x) <= 92681 - (y)) ? ((x) * (y)) % (p) : X(safe_mulmod)(x, y, p))
|
cannam@127
|
950
|
cannam@127
|
951 INT X(safe_mulmod)(INT x, INT y, INT p);
|
cannam@127
|
952 INT X(power_mod)(INT n, INT m, INT p);
|
cannam@127
|
953 INT X(find_generator)(INT p);
|
cannam@127
|
954 INT X(first_divisor)(INT n);
|
cannam@127
|
955 int X(is_prime)(INT n);
|
cannam@127
|
956 INT X(next_prime)(INT n);
|
cannam@127
|
957 int X(factors_into)(INT n, const INT *primes);
|
cannam@127
|
958 int X(factors_into_small_primes)(INT n);
|
cannam@127
|
959 INT X(choose_radix)(INT r, INT n);
|
cannam@127
|
960 INT X(isqrt)(INT n);
|
cannam@127
|
961 INT X(modulo)(INT a, INT n);
|
cannam@127
|
962
|
cannam@127
|
963 #define GENERIC_MIN_BAD 173 /* min prime for which generic becomes bad */
|
cannam@127
|
964
|
cannam@127
|
965 /* thresholds below which certain solvers are considered SLOW. These are guesses
|
cannam@127
|
966 believed to be conservative */
|
cannam@127
|
967 #define GENERIC_MAX_SLOW 16
|
cannam@127
|
968 #define RADER_MAX_SLOW 32
|
cannam@127
|
969 #define BLUESTEIN_MAX_SLOW 24
|
cannam@127
|
970
|
cannam@127
|
971 /*-----------------------------------------------------------------------*/
|
cannam@127
|
972 /* rader.c: */
|
cannam@127
|
973 typedef struct rader_tls rader_tl;
|
cannam@127
|
974
|
cannam@127
|
975 void X(rader_tl_insert)(INT k1, INT k2, INT k3, R *W, rader_tl **tl);
|
cannam@127
|
976 R *X(rader_tl_find)(INT k1, INT k2, INT k3, rader_tl *t);
|
cannam@127
|
977 void X(rader_tl_delete)(R *W, rader_tl **tl);
|
cannam@127
|
978
|
cannam@127
|
979 /*-----------------------------------------------------------------------*/
|
cannam@127
|
980 /* copy/transposition routines */
|
cannam@127
|
981
|
cannam@127
|
982 /* lower bound to the cache size, for tiled routines */
|
cannam@127
|
983 #define CACHESIZE 8192
|
cannam@127
|
984
|
cannam@127
|
985 INT X(compute_tilesz)(INT vl, int how_many_tiles_in_cache);
|
cannam@127
|
986
|
cannam@127
|
987 void X(tile2d)(INT n0l, INT n0u, INT n1l, INT n1u, INT tilesz,
|
cannam@127
|
988 void (*f)(INT n0l, INT n0u, INT n1l, INT n1u, void *args),
|
cannam@127
|
989 void *args);
|
cannam@127
|
990 void X(cpy1d)(R *I, R *O, INT n0, INT is0, INT os0, INT vl);
|
cannam@127
|
991 void X(zero1d_pair)(R *O0, R *O1, INT n0, INT os0);
|
cannam@127
|
992 void X(cpy2d)(R *I, R *O,
|
cannam@127
|
993 INT n0, INT is0, INT os0,
|
cannam@127
|
994 INT n1, INT is1, INT os1,
|
cannam@127
|
995 INT vl);
|
cannam@127
|
996 void X(cpy2d_ci)(R *I, R *O,
|
cannam@127
|
997 INT n0, INT is0, INT os0,
|
cannam@127
|
998 INT n1, INT is1, INT os1,
|
cannam@127
|
999 INT vl);
|
cannam@127
|
1000 void X(cpy2d_co)(R *I, R *O,
|
cannam@127
|
1001 INT n0, INT is0, INT os0,
|
cannam@127
|
1002 INT n1, INT is1, INT os1,
|
cannam@127
|
1003 INT vl);
|
cannam@127
|
1004 void X(cpy2d_tiled)(R *I, R *O,
|
cannam@127
|
1005 INT n0, INT is0, INT os0,
|
cannam@127
|
1006 INT n1, INT is1, INT os1,
|
cannam@127
|
1007 INT vl);
|
cannam@127
|
1008 void X(cpy2d_tiledbuf)(R *I, R *O,
|
cannam@127
|
1009 INT n0, INT is0, INT os0,
|
cannam@127
|
1010 INT n1, INT is1, INT os1,
|
cannam@127
|
1011 INT vl);
|
cannam@127
|
1012 void X(cpy2d_pair)(R *I0, R *I1, R *O0, R *O1,
|
cannam@127
|
1013 INT n0, INT is0, INT os0,
|
cannam@127
|
1014 INT n1, INT is1, INT os1);
|
cannam@127
|
1015 void X(cpy2d_pair_ci)(R *I0, R *I1, R *O0, R *O1,
|
cannam@127
|
1016 INT n0, INT is0, INT os0,
|
cannam@127
|
1017 INT n1, INT is1, INT os1);
|
cannam@127
|
1018 void X(cpy2d_pair_co)(R *I0, R *I1, R *O0, R *O1,
|
cannam@127
|
1019 INT n0, INT is0, INT os0,
|
cannam@127
|
1020 INT n1, INT is1, INT os1);
|
cannam@127
|
1021
|
cannam@127
|
1022 void X(transpose)(R *I, INT n, INT s0, INT s1, INT vl);
|
cannam@127
|
1023 void X(transpose_tiled)(R *I, INT n, INT s0, INT s1, INT vl);
|
cannam@127
|
1024 void X(transpose_tiledbuf)(R *I, INT n, INT s0, INT s1, INT vl);
|
cannam@127
|
1025
|
cannam@127
|
1026 typedef void (*transpose_func)(R *I, INT n, INT s0, INT s1, INT vl);
|
cannam@127
|
1027 typedef void (*cpy2d_func)(R *I, R *O,
|
cannam@127
|
1028 INT n0, INT is0, INT os0,
|
cannam@127
|
1029 INT n1, INT is1, INT os1,
|
cannam@127
|
1030 INT vl);
|
cannam@127
|
1031
|
cannam@127
|
1032 /*-----------------------------------------------------------------------*/
|
cannam@127
|
1033 /* misc stuff */
|
cannam@127
|
1034 void X(null_awake)(plan *ego, enum wakefulness wakefulness);
|
cannam@127
|
1035 double X(iestimate_cost)(const planner *, const plan *, const problem *);
|
cannam@127
|
1036
|
cannam@127
|
1037 #ifdef FFTW_RANDOM_ESTIMATOR
|
cannam@127
|
1038 extern unsigned X(random_estimate_seed);
|
cannam@127
|
1039 #endif
|
cannam@127
|
1040
|
cannam@127
|
1041 double X(measure_execution_time)(const planner *plnr,
|
cannam@127
|
1042 plan *pln, const problem *p);
|
cannam@127
|
1043 IFFTW_EXTERN int X(ialignment_of)(R *p);
|
cannam@127
|
1044 unsigned X(hash)(const char *s);
|
cannam@127
|
1045 INT X(nbuf)(INT n, INT vl, INT maxnbuf);
|
cannam@127
|
1046 int X(nbuf_redundant)(INT n, INT vl, size_t which,
|
cannam@127
|
1047 const INT *maxnbuf, size_t nmaxnbuf);
|
cannam@127
|
1048 INT X(bufdist)(INT n, INT vl);
|
cannam@127
|
1049 int X(toobig)(INT n);
|
cannam@127
|
1050 int X(ct_uglyp)(INT min_n, INT v, INT n, INT r);
|
cannam@127
|
1051
|
cannam@127
|
1052 #if HAVE_SIMD
|
cannam@127
|
1053 R *X(taint)(R *p, INT s);
|
cannam@127
|
1054 R *X(join_taint)(R *p1, R *p2);
|
cannam@127
|
1055 #define TAINT(p, s) X(taint)(p, s)
|
cannam@127
|
1056 #define UNTAINT(p) ((R *) (((uintptr_t) (p)) & ~(uintptr_t)3))
|
cannam@127
|
1057 #define TAINTOF(p) (((uintptr_t)(p)) & 3)
|
cannam@127
|
1058 #define JOIN_TAINT(p1, p2) X(join_taint)(p1, p2)
|
cannam@127
|
1059 #else
|
cannam@127
|
1060 #define TAINT(p, s) (p)
|
cannam@127
|
1061 #define UNTAINT(p) (p)
|
cannam@127
|
1062 #define TAINTOF(p) 0
|
cannam@127
|
1063 #define JOIN_TAINT(p1, p2) p1
|
cannam@127
|
1064 #endif
|
cannam@127
|
1065
|
cannam@127
|
1066 #ifdef FFTW_DEBUG_ALIGNMENT
|
cannam@127
|
1067 # define ASSERT_ALIGNED_DOUBLE { \
|
cannam@127
|
1068 double __foo; \
|
cannam@127
|
1069 CK(!(((uintptr_t) &__foo) & 0x7)); \
|
cannam@127
|
1070 }
|
cannam@127
|
1071 #else
|
cannam@127
|
1072 # define ASSERT_ALIGNED_DOUBLE
|
cannam@127
|
1073 #endif /* FFTW_DEBUG_ALIGNMENT */
|
cannam@127
|
1074
|
cannam@127
|
1075
|
cannam@127
|
1076
|
cannam@127
|
1077 /*-----------------------------------------------------------------------*/
|
cannam@127
|
1078 /* macros used in codelets to reduce source code size */
|
cannam@127
|
1079
|
cannam@127
|
1080 typedef R E; /* internal precision of codelets. */
|
cannam@127
|
1081
|
cannam@127
|
1082 #if defined(FFTW_LDOUBLE)
|
cannam@127
|
1083 # define K(x) ((E) x##L)
|
cannam@127
|
1084 #elif defined(FFTW_QUAD)
|
cannam@127
|
1085 # define K(x) ((E) x##Q)
|
cannam@127
|
1086 #else
|
cannam@127
|
1087 # define K(x) ((E) x)
|
cannam@127
|
1088 #endif
|
cannam@127
|
1089 #define DK(name, value) const E name = K(value)
|
cannam@127
|
1090
|
cannam@127
|
1091 /* FMA macros */
|
cannam@127
|
1092
|
cannam@127
|
1093 #if defined(__GNUC__) && (defined(__powerpc__) || defined(__ppc__) || defined(_POWER))
|
cannam@127
|
1094 /* The obvious expression a * b + c does not work. If both x = a * b
|
cannam@127
|
1095 + c and y = a * b - c appear in the source, gcc computes t = a * b,
|
cannam@127
|
1096 x = t + c, y = t - c, thus destroying the fma.
|
cannam@127
|
1097
|
cannam@127
|
1098 This peculiar coding seems to do the right thing on all of
|
cannam@127
|
1099 gcc-2.95, gcc-3.1, gcc-3.2, and gcc-3.3. It does the right thing
|
cannam@127
|
1100 on gcc-3.4 -fno-web (because the ``web'' pass splits the variable
|
cannam@127
|
1101 `x' for the single-assignment form).
|
cannam@127
|
1102
|
cannam@127
|
1103 However, gcc-4.0 is a formidable adversary which succeeds in
|
cannam@127
|
1104 pessimizing two fma's into one multiplication and two additions.
|
cannam@127
|
1105 It does it very early in the game---before the optimization passes
|
cannam@127
|
1106 even start. The only real workaround seems to use fake inline asm
|
cannam@127
|
1107 such as
|
cannam@127
|
1108
|
cannam@127
|
1109 asm ("# confuse gcc %0" : "=f"(a) : "0"(a));
|
cannam@127
|
1110 return a * b + c;
|
cannam@127
|
1111
|
cannam@127
|
1112 in each of the FMA, FMS, FNMA, and FNMS functions. However, this
|
cannam@127
|
1113 does not solve the problem either, because two equal asm statements
|
cannam@127
|
1114 count as a common subexpression! One must use *different* fake asm
|
cannam@127
|
1115 statements:
|
cannam@127
|
1116
|
cannam@127
|
1117 in FMA:
|
cannam@127
|
1118 asm ("# confuse gcc for fma %0" : "=f"(a) : "0"(a));
|
cannam@127
|
1119
|
cannam@127
|
1120 in FMS:
|
cannam@127
|
1121 asm ("# confuse gcc for fms %0" : "=f"(a) : "0"(a));
|
cannam@127
|
1122
|
cannam@127
|
1123 etc.
|
cannam@127
|
1124
|
cannam@127
|
1125 After these changes, gcc recalcitrantly generates the fma that was
|
cannam@127
|
1126 in the source to begin with. However, the extra asm() cruft
|
cannam@127
|
1127 confuses other passes of gcc, notably the instruction scheduler.
|
cannam@127
|
1128 (Of course, one could also generate the fma directly via inline
|
cannam@127
|
1129 asm, but this confuses the scheduler even more.)
|
cannam@127
|
1130
|
cannam@127
|
1131 Steven and I have submitted more than one bug report to the gcc
|
cannam@127
|
1132 mailing list over the past few years, to no effect. Thus, I give
|
cannam@127
|
1133 up. gcc-4.0 can go to hell. I'll wait at least until gcc-4.3 is
|
cannam@127
|
1134 out before touching this crap again.
|
cannam@127
|
1135 */
|
cannam@127
|
1136 static __inline__ E FMA(E a, E b, E c)
|
cannam@127
|
1137 {
|
cannam@127
|
1138 E x = a * b;
|
cannam@127
|
1139 x = x + c;
|
cannam@127
|
1140 return x;
|
cannam@127
|
1141 }
|
cannam@127
|
1142
|
cannam@127
|
1143 static __inline__ E FMS(E a, E b, E c)
|
cannam@127
|
1144 {
|
cannam@127
|
1145 E x = a * b;
|
cannam@127
|
1146 x = x - c;
|
cannam@127
|
1147 return x;
|
cannam@127
|
1148 }
|
cannam@127
|
1149
|
cannam@127
|
1150 static __inline__ E FNMA(E a, E b, E c)
|
cannam@127
|
1151 {
|
cannam@127
|
1152 E x = a * b;
|
cannam@127
|
1153 x = - (x + c);
|
cannam@127
|
1154 return x;
|
cannam@127
|
1155 }
|
cannam@127
|
1156
|
cannam@127
|
1157 static __inline__ E FNMS(E a, E b, E c)
|
cannam@127
|
1158 {
|
cannam@127
|
1159 E x = a * b;
|
cannam@127
|
1160 x = - (x - c);
|
cannam@127
|
1161 return x;
|
cannam@127
|
1162 }
|
cannam@127
|
1163 #else
|
cannam@127
|
1164 #define FMA(a, b, c) (((a) * (b)) + (c))
|
cannam@127
|
1165 #define FMS(a, b, c) (((a) * (b)) - (c))
|
cannam@127
|
1166 #define FNMA(a, b, c) (- (((a) * (b)) + (c)))
|
cannam@127
|
1167 #define FNMS(a, b, c) ((c) - ((a) * (b)))
|
cannam@127
|
1168 #endif
|
cannam@127
|
1169
|
cannam@127
|
1170 #ifdef __cplusplus
|
cannam@127
|
1171 } /* extern "C" */
|
cannam@127
|
1172 #endif /* __cplusplus */
|
cannam@127
|
1173
|
cannam@127
|
1174 #endif /* __IFFTW_H__ */
|