Revision 167:bd3cc4d1df30

View differences:

src/fftw-3.3.8/AUTHORS
1
Authors of FFTW (reachable at fftw@fftw.org):
2

  
3
Matteo Frigo <athena@fftw.org>
4
Steven G. Johnson <stevenj@alum.mit.edu>
5

  
6
Stefan Kral <skral@fftw.org> wrote genfft-k7/*.ml*, which was
7
added in fftw-3.0 and removed in fftw-3.2.
8

  
9
Romain Dolbeau contributed support for AVX512 and KCvi.
10

  
11
Erik Lindahl contributed support for AVX2 and Power8 VSX.
12

  
13
Support for the Cell Broadband Engine was graciously donated by the
14
IBM Austin Research Lab, which was added in fftw-3.2 and removed in
15
fftw-3.3.
16

  
17
Support for MIPS64 paired-single SIMD instructions was graciously
18
donated by CodeSourcery, Inc.
src/fftw-3.3.8/CMakeLists.txt
1
cmake_minimum_required (VERSION 3.0)
2

  
3
if (NOT DEFINED CMAKE_BUILD_TYPE)
4
  set (CMAKE_BUILD_TYPE Release CACHE STRING "Build type")
5
endif ()
6

  
7
project (fftw)
8

  
9
if (POLICY CMP0042)
10
  cmake_policy (SET CMP0042 NEW)
11
endif ()
12

  
13
option (BUILD_SHARED_LIBS "Build shared libraries" ON)
14
option (BUILD_TESTS "Build tests" ON)
15

  
16
option (ENABLE_OPENMP "Use OpenMP for multithreading" OFF)
17
option (ENABLE_THREADS "Use pthread for multithreading" OFF)
18
option (WITH_COMBINED_THREADS "Merge thread library" OFF)
19

  
20
option (ENABLE_FLOAT "single-precision" OFF)
21
option (ENABLE_LONG_DOUBLE "long-double precision" OFF)
22
option (ENABLE_QUAD_PRECISION "quadruple-precision" OFF)
23

  
24
option (ENABLE_SSE "Compile with SSE instruction set support" OFF)
25
option (ENABLE_SSE2 "Compile with SSE2 instruction set support" OFF)
26
option (ENABLE_AVX "Compile with AVX instruction set support" OFF)
27
option (ENABLE_AVX2 "Compile with AVX2 instruction set support" OFF)
28

  
29
option (DISABLE_FORTRAN "Disable Fortran wrapper routines" OFF)
30

  
31
include(GNUInstallDirs)
32

  
33

  
34
include (CheckIncludeFile)
35
check_include_file (alloca.h         HAVE_ALLOCA_H)
36
check_include_file (altivec.h        HAVE_ALTIVEC_H)
37
check_include_file (c_asm.h          HAVE_C_ASM_H)
38
check_include_file (dlfcn.h          HAVE_DLFCN_H)
39
check_include_file (intrinsics.h     HAVE_INTRINSICS_H)
40
check_include_file (inttypes.h       HAVE_INTTYPES_H)
41
check_include_file (libintl.h        HAVE_LIBINTL_H)
42
check_include_file (limits.h         HAVE_LIMITS_H)
43
check_include_file (mach/mach_time.h HAVE_MACH_MACH_TIME_H)
44
check_include_file (malloc.h         HAVE_MALLOC_H)
45
check_include_file (memory.h         HAVE_MEMORY_H)
46
check_include_file (stddef.h         HAVE_STDDEF_H)
47
check_include_file (stdint.h         HAVE_STDINT_H)
48
check_include_file (stdlib.h         HAVE_STDLIB_H)
49
check_include_file (string.h         HAVE_STRING_H)
50
check_include_file (strings.h        HAVE_STRINGS_H)
51
check_include_file (sys/types.h      HAVE_SYS_TYPES_H)
52
check_include_file (sys/time.h       HAVE_SYS_TIME_H)
53
check_include_file (sys/stat.h       HAVE_SYS_STAT_H)
54
check_include_file (sys/sysctl.h     HAVE_SYS_SYSCTL_H)
55
check_include_file (time.h           HAVE_TIME_H)
56
check_include_file (uintptr.h        HAVE_UINTPTR_H)
57
check_include_file (unistd.h         HAVE_UNISTD_H)
58
if (HAVE_TIME_H AND HAVE_SYS_TIME_H)
59
  set (TIME_WITH_SYS_TIME TRUE)
60
endif ()
61

  
62
include (CheckPrototypeDefinition) 
63
check_prototype_definition (drand48 "double drand48 (void)" "0" stdlib.h HAVE_DECL_DRAND48)
64
check_prototype_definition (srand48 "void srand48(long int seedval)" "0" stdlib.h HAVE_DECL_SRAND48)
65
check_prototype_definition (cosl "long double cosl( long double arg )" "0" math.h HAVE_DECL_COSL)
66
check_prototype_definition (sinl "long double sinl( long double arg )" "0" math.h HAVE_DECL_SINL)
67
check_prototype_definition (memalign "void *memalign(size_t alignment, size_t size)" "0" malloc.h HAVE_DECL_MEMALIGN)
68
check_prototype_definition (posix_memalign "int posix_memalign(void **memptr, size_t alignment, size_t size)" "0" stdlib.h HAVE_DECL_POSIX_MEMALIGN)
69

  
70
include (CheckSymbolExists)
71
check_symbol_exists (clock_gettime time.h HAVE_CLOCK_GETTIME)
72
check_symbol_exists (gettimeofday sys/time.h HAVE_GETTIMEOFDAY)
73
check_symbol_exists (getpagesize unistd.h HAVE_GETPAGESIZE)
74
check_symbol_exists (drand48 stdlib.h HAVE_DRAND48)
75
check_symbol_exists (srand48 stdlib.h HAVE_SRAND48)
76
check_symbol_exists (memalign malloc.h HAVE_MEMALIGN)
77
check_symbol_exists (posix_memalign stdlib.h HAVE_POSIX_MEMALIGN)
78
check_symbol_exists (mach_absolute_time mach/mach_time.h HAVE_MACH_ABSOLUTE_TIME)
79
check_symbol_exists (alloca alloca.h HAVE_ALLOCA)
80
if (NOT HAVE_ALLOCA)
81
  unset (HAVE_ALLOCA CACHE)
82
  check_symbol_exists (alloca malloc.h HAVE_ALLOCA)
83
endif ()
84
check_symbol_exists (isnan math.h HAVE_ISNAN)
85
check_symbol_exists (snprintf stdio.h HAVE_SNPRINTF)
86
check_symbol_exists (strchr string.h HAVE_STRCHR)
87
check_symbol_exists (sysctl unistd.h HAVE_SYSCTL)
88

  
89
if (UNIX)
90
  set (CMAKE_REQUIRED_LIBRARIES m)
91
endif ()
92
check_symbol_exists (cosl math.h HAVE_COSL)
93
check_symbol_exists (sinl math.h HAVE_SINL)
94

  
95
include (CheckTypeSize)
96
check_type_size ("float" SIZEOF_FLOAT)
97
check_type_size ("double" SIZEOF_DOUBLE)
98
check_type_size ("int" SIZEOF_INT)
99
check_type_size ("long" SIZEOF_LONG)
100
check_type_size ("long long" SIZEOF_LONG_LONG)
101
check_type_size ("unsigned int" SIZEOF_UNSIGNED_INT)
102
check_type_size ("unsigned long" SIZEOF_UNSIGNED_LONG)
103
check_type_size ("unsigned long long" SIZEOF_UNSIGNED_LONG_LONG)
104
check_type_size ("size_t" SIZEOF_SIZE_T)
105
check_type_size ("ptrdiff_t" SIZEOF_PTRDIFF_T)
106
math (EXPR SIZEOF_INT_BITS "8 * ${SIZEOF_INT}")
107
set (C_FFTW_R2R_KIND "C_INT${SIZEOF_INT_BITS}_T")
108

  
109
find_library (LIBM_LIBRARY NAMES m)
110
if (LIBM_LIBRARY)
111
  set (HAVE_LIBM TRUE)
112
endif ()
113

  
114

  
115
if (ENABLE_THREADS)
116
  find_package (Threads)
117
endif ()
118
if (Threads_FOUND)
119
  if(CMAKE_USE_PTHREADS_INIT)
120
    set (USING_POSIX_THREADS 1)
121
  endif ()
122
  set (HAVE_THREADS TRUE)
123
endif ()
124

  
125
if (ENABLE_OPENMP)
126
  find_package (OpenMP)
127
endif ()
128
if (OPENMP_FOUND)
129
  set (HAVE_OPENMP TRUE)
130
endif ()
131

  
132
include (CheckCCompilerFlag)
133

  
134
if (ENABLE_SSE)
135
  foreach (FLAG "-msse" "/arch:SSE")
136
    unset (HAVE_SSE CACHE)
137
    check_c_compiler_flag (${FLAG} HAVE_SSE)
138
    if (HAVE_SSE)
139
      set (SSE_FLAG ${FLAG})
140
      break()
141
    endif ()
142
  endforeach ()
143
endif ()
144

  
145
if (ENABLE_SSE2)
146
  foreach (FLAG "-msse2" "/arch:SSE2")
147
    unset (HAVE_SSE2 CACHE)
148
    check_c_compiler_flag (${FLAG} HAVE_SSE2)
149
    if (HAVE_SSE2)
150
      set (SSE2_FLAG ${FLAG})
151
      break()
152
    endif ()
153
  endforeach ()
154
endif ()
155

  
156
if (ENABLE_AVX)
157
  foreach (FLAG "-mavx" "/arch:AVX")
158
    unset (HAVE_AVX CACHE)
159
    check_c_compiler_flag (${FLAG} HAVE_AVX)
160
    if (HAVE_AVX)
161
      set (AVX_FLAG ${FLAG})
162
      break()
163
    endif ()
164
  endforeach ()
165
endif ()
166

  
167
if (ENABLE_AVX2)
168
  foreach (FLAG "-mavx2" "/arch:AVX2")
169
    unset (HAVE_AVX2 CACHE)
170
    check_c_compiler_flag (${FLAG} HAVE_AVX2)
171
    if (HAVE_AVX2)
172
      set (AVX2_FLAG ${FLAG})
173
      break()
174
    endif ()
175
  endforeach ()
176
endif ()
177

  
178
# AVX2 codelets require FMA support as well
179
if (ENABLE_AVX2)
180
  foreach (FLAG "-mfma" "/arch:FMA")
181
    unset (HAVE_FMA CACHE)
182
    check_c_compiler_flag (${FLAG} HAVE_FMA)
183
    if (HAVE_FMA)
184
      set (FMA_FLAG ${FLAG})
185
      break()
186
    endif ()
187
  endforeach ()
188
endif ()
189

  
190
if (HAVE_SSE2 OR HAVE_AVX)
191
  set (HAVE_SIMD TRUE)
192
endif ()
193
file(GLOB           fftw_api_SOURCE                 api/*.c             api/*.h)
194
file(GLOB           fftw_dft_SOURCE                 dft/*.c             dft/*.h)
195
file(GLOB           fftw_dft_scalar_SOURCE          dft/scalar/*.c      dft/scalar/*.h)
196
file(GLOB           fftw_dft_scalar_codelets_SOURCE dft/scalar/codelets/*.c     dft/scalar/codelets/*.h)
197
file(GLOB           fftw_dft_simd_SOURCE            dft/simd/*.c        dft/simd/*.h)
198

  
199
file(GLOB           fftw_dft_simd_sse2_SOURCE       dft/simd/sse2/*.c   dft/simd/sse2/*.h)
200
file(GLOB           fftw_dft_simd_avx_SOURCE        dft/simd/avx/*.c    dft/simd/avx/*.h)
201
file(GLOB           fftw_dft_simd_avx2_SOURCE       dft/simd/avx2/*.c   dft/simd/avx2/*.h dft/simd/avx2-128/*.c   dft/simd/avx2-128/*.h)
202
file(GLOB           fftw_kernel_SOURCE              kernel/*.c          kernel/*.h)
203
file(GLOB           fftw_rdft_SOURCE                rdft/*.c            rdft/*.h)
204
file(GLOB           fftw_rdft_scalar_SOURCE         rdft/scalar/*.c     rdft/scalar/*.h)
205

  
206
file(GLOB           fftw_rdft_scalar_r2cb_SOURCE    rdft/scalar/r2cb/*.c
207
                                                    rdft/scalar/r2cb/*.h)
208
file(GLOB           fftw_rdft_scalar_r2cf_SOURCE    rdft/scalar/r2cf/*.c
209
                                                    rdft/scalar/r2cf/*.h)
210
file(GLOB           fftw_rdft_scalar_r2r_SOURCE     rdft/scalar/r2r/*.c
211
                                                    rdft/scalar/r2r/*.h)
212

  
213
file(GLOB           fftw_rdft_simd_SOURCE           rdft/simd/*.c       rdft/simd/*.h)
214
file(GLOB           fftw_rdft_simd_sse2_SOURCE      rdft/simd/sse2/*.c  rdft/simd/sse2/*.h)
215
file(GLOB           fftw_rdft_simd_avx_SOURCE       rdft/simd/avx/*.c   rdft/simd/avx/*.h)
216
file(GLOB           fftw_rdft_simd_avx2_SOURCE      rdft/simd/avx2/*.c  rdft/simd/avx2/*.h rdft/simd/avx2-128/*.c  rdft/simd/avx2-128/*.h)
217

  
218
file(GLOB           fftw_reodft_SOURCE              reodft/*.c          reodft/*.h)
219
file(GLOB           fftw_simd_support_SOURCE        simd-support/*.c    simd-support/*.h)
220
file(GLOB           fftw_libbench2_SOURCE           libbench2/*.c       libbench2/*.h)
221
list (REMOVE_ITEM   fftw_libbench2_SOURCE ${CMAKE_CURRENT_SOURCE_DIR}/libbench2/useropt.c)
222

  
223
set(SOURCEFILES
224
    ${fftw_api_SOURCE}
225
    ${fftw_dft_SOURCE}
226
    ${fftw_dft_scalar_SOURCE}
227
    ${fftw_dft_scalar_codelets_SOURCE}
228
    ${fftw_dft_simd_SOURCE}
229
    ${fftw_kernel_SOURCE}
230
    ${fftw_rdft_SOURCE}
231
    ${fftw_rdft_scalar_SOURCE}
232

  
233
    ${fftw_rdft_scalar_r2cb_SOURCE}
234
    ${fftw_rdft_scalar_r2cf_SOURCE}
235
    ${fftw_rdft_scalar_r2r_SOURCE}
236

  
237
    ${fftw_rdft_simd_SOURCE}
238
    ${fftw_reodft_SOURCE}
239
    ${fftw_simd_support_SOURCE}
240
    ${fftw_threads_SOURCE}
241
)
242

  
243
set(fftw_par_SOURCE
244
    threads/api.c
245
    threads/conf.c
246
    threads/ct.c
247
    threads/dft-vrank-geq1.c
248
    threads/f77api.c
249
    threads/hc2hc.c
250
    threads/rdft-vrank-geq1.c
251
    threads/vrank-geq1-rdft2.c)
252

  
253
set (fftw_threads_SOURCE ${fftw_par_SOURCE} threads/threads.c)
254
set (fftw_omp_SOURCE ${fftw_par_SOURCE} threads/openmp.c)
255

  
256

  
257
include_directories (.)
258

  
259

  
260
if (WITH_COMBINED_THREADS)
261
  list (APPEND SOURCEFILES ${fftw_threads_SOURCE})
262
endif ()
263

  
264

  
265
if (HAVE_SSE2)
266
  list (APPEND SOURCEFILES ${fftw_dft_simd_sse2_SOURCE} ${fftw_rdft_simd_sse2_SOURCE})
267
endif ()
268

  
269
if (HAVE_AVX)
270
  list (APPEND SOURCEFILES ${fftw_dft_simd_avx_SOURCE} ${fftw_rdft_simd_avx_SOURCE})
271
endif ()
272

  
273
if (HAVE_AVX2)
274
  list (APPEND SOURCEFILES ${fftw_dft_simd_avx2_SOURCE} ${fftw_rdft_simd_avx2_SOURCE})
275
endif ()
276

  
277
set (FFTW_VERSION 3.3.7)
278

  
279
set (PREC_SUFFIX)
280
if (ENABLE_FLOAT)
281
  set (FFTW_SINGLE TRUE)
282
  set (BENCHFFT_SINGLE TRUE)
283
  set (PREC_SUFFIX f)
284
endif ()
285

  
286
if (ENABLE_LONG_DOUBLE)
287
  set (FFTW_LDOUBLE TRUE)
288
  set (BENCHFFT_LDOUBLE TRUE)
289
  set (PREC_SUFFIX l)
290
endif ()
291

  
292
if (ENABLE_QUAD_PRECISION)
293
  set (FFTW_QUAD TRUE)
294
  set (BENCHFFT_QUAD TRUE)
295
  set (PREC_SUFFIX q)
296
endif ()
297
set (fftw3_lib fftw3${PREC_SUFFIX})
298

  
299
configure_file (cmake.config.h.in config.h @ONLY)
300
include_directories (${CMAKE_CURRENT_BINARY_DIR})
301

  
302
if (BUILD_SHARED_LIBS)
303
  add_definitions (-DFFTW_DLL)
304
endif ()
305

  
306
add_library (${fftw3_lib} ${SOURCEFILES})
307
target_include_directories (${fftw3_lib} INTERFACE $<INSTALL_INTERFACE:include>)
308
if (MSVC)
309
  target_compile_definitions (${fftw3_lib} PRIVATE /bigobj)
310
endif ()
311
if (HAVE_SSE)
312
  target_compile_options (${fftw3_lib} PRIVATE ${SSE_FLAG})
313
endif ()
314
if (HAVE_SSE2)
315
  target_compile_options (${fftw3_lib} PRIVATE ${SSE2_FLAG})
316
endif ()
317
if (HAVE_AVX)
318
  target_compile_options (${fftw3_lib} PRIVATE ${AVX_FLAG})
319
endif ()
320
if (HAVE_AVX2)
321
  target_compile_options (${fftw3_lib} PRIVATE ${AVX2_FLAG})
322
endif ()
323
if (HAVE_FMA)
324
  target_compile_options (${fftw3_lib} PRIVATE ${FMA_FLAG})
325
endif ()
326
if (HAVE_LIBM)
327
  target_link_libraries (${fftw3_lib} m)
328
endif ()
329

  
330
set (subtargets ${fftw3_lib})
331

  
332
if (Threads_FOUND)
333
  if (WITH_COMBINED_THREADS)
334
    target_link_libraries (${fftw3_lib} ${CMAKE_THREAD_LIBS_INIT})
335
  else ()
336
    add_library (${fftw3_lib}_threads ${fftw_threads_SOURCE})
337
    target_include_directories (${fftw3_lib}_threads INTERFACE $<INSTALL_INTERFACE:include>)
338
    target_link_libraries (${fftw3_lib}_threads ${fftw3_lib})
339
    target_link_libraries (${fftw3_lib}_threads ${CMAKE_THREAD_LIBS_INIT})
340
    list (APPEND subtargets ${fftw3_lib}_threads)
341
  endif ()
342
endif ()
343

  
344
if (OPENMP_FOUND)
345
  add_library (${fftw3_lib}_omp ${fftw_omp_SOURCE})
346
  target_include_directories (${fftw3_lib}_omp INTERFACE $<INSTALL_INTERFACE:include>)
347
  target_link_libraries (${fftw3_lib}_omp ${fftw3_lib})
348
  target_link_libraries (${fftw3_lib}_omp ${CMAKE_THREAD_LIBS_INIT})
349
  list (APPEND subtargets ${fftw3_lib}_omp)
350
  target_compile_options (${fftw3_lib}_omp PRIVATE ${OpenMP_C_FLAGS})
351
endif ()
352

  
353
foreach(subtarget ${subtargets})
354
  set_target_properties (${subtarget} PROPERTIES SOVERSION 3.5.7 VERSION 3)
355
  install (TARGETS ${subtarget}
356
	  RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
357
	  LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
358
          ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
359
endforeach ()
360
install(TARGETS ${fftw3_lib}
361
          EXPORT FFTW3LibraryDepends
362
          RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
363
          LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
364
          ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
365

  
366
install (FILES api/fftw3.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
367
if (EXISTS ${CMAKE_SOURCE_DIR}/api/fftw3.f)
368
  install (FILES api/fftw3.f api/fftw3l.f03 api/fftw3q.f03 DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
369
endif ()
370
if (EXISTS ${CMAKE_SOURCE_DIR}/api/fftw3.f03.in)
371
  file (READ api/fftw3.f03.in FFTW3_F03_IN OFFSET 42)
372
  file (WRITE ${CMAKE_CURRENT_BINARY_DIR}/fftw3.f03 "! Generated automatically.  DO NOT EDIT!\n\n")
373
  file (APPEND ${CMAKE_CURRENT_BINARY_DIR}/fftw3.f03 "  integer, parameter :: C_FFTW_R2R_KIND = ${C_FFTW_R2R_KIND}\n\n")
374
  file (APPEND ${CMAKE_CURRENT_BINARY_DIR}/fftw3.f03 "${FFTW3_F03_IN}")
375
  install (FILES ${CMAKE_CURRENT_BINARY_DIR}/fftw3.f03 DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
376
endif ()
377

  
378
if (BUILD_TESTS)
379

  
380
  add_executable (bench ${fftw_libbench2_SOURCE} tests/bench.c tests/hook.c tests/fftw-bench.c)
381

  
382
  if (ENABLE_THREADS AND NOT WITH_COMBINED_THREADS)
383
    target_link_libraries (bench ${fftw3_lib}_threads)
384
  else ()
385
    target_link_libraries (bench ${fftw3_lib})
386
  endif ()
387

  
388

  
389
  enable_testing ()
390

  
391
  if (Threads_FOUND)
392

  
393
    macro (fftw_add_test problem)
394
      add_test (NAME ${problem} COMMAND bench -s ${problem})
395
    endmacro ()
396

  
397
    fftw_add_test (32x64)
398
    fftw_add_test (ib256)
399

  
400
  endif ()
401
endif ()
402

  
403
# pkgconfig file
404
set (prefix ${CMAKE_INSTALL_PREFIX})
405
set (exec_prefix ${CMAKE_INSTALL_PREFIX})
406
set (libdir ${CMAKE_INSTALL_FULL_LIBDIR})
407
set (includedir ${CMAKE_INSTALL_FULL_INCLUDEDIR})
408
set (VERSION ${FFTW_VERSION})
409
configure_file (fftw.pc.in fftw${PREC_SUFFIX}.pc @ONLY)
410
install (FILES
411
          ${CMAKE_CURRENT_BINARY_DIR}/fftw${PREC_SUFFIX}.pc
412
         DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig
413
         COMPONENT Development)
414

  
415
# cmake file
416
set (FFTW3_LIBRARIES "FFTW3::${fftw3_lib}")
417
configure_file (FFTW3Config.cmake.in FFTW3${PREC_SUFFIX}Config.cmake @ONLY)
418
configure_file (FFTW3ConfigVersion.cmake.in FFTW3${PREC_SUFFIX}ConfigVersion.cmake @ONLY)
419
install (FILES
420
          ${CMAKE_CURRENT_BINARY_DIR}/FFTW3${PREC_SUFFIX}Config.cmake
421
          ${CMAKE_CURRENT_BINARY_DIR}/FFTW3${PREC_SUFFIX}ConfigVersion.cmake
422
	  DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/fftw3${PREC_SUFFIX}
423
         COMPONENT Development)
424

  
425
export (TARGETS ${fftw3_lib} NAMESPACE FFTW3:: FILE ${PROJECT_BINARY_DIR}/FFTW3LibraryDepends.cmake)
426
install(EXPORT FFTW3LibraryDepends
427
        NAMESPACE FFTW3::
428
        DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/fftw3${PREC_SUFFIX}
429
        COMPONENT Development)
src/fftw-3.3.8/CONVENTIONS
1
Code conventions used internally by fftw3 (not in API):
2

  
3
LEARN FROM THE MASTERS: read Ken Thompson's C compiler in Plan 9.
4
   Avoid learning from C++/Java programs.
5

  
6
INDENTATION: K&R, 5 spaces/tab.  In case of doubt, indent -kr -i5.
7

  
8
NAMES: keep them short.  Shorter than you think.  The Bible was written
9
   without vowels.  Don't outsmart the Bible.
10

  
11
   Common names:
12

  
13
   R       : real type, aka fftw_real
14
   E       : real type for local variables (possibly extra precision)
15
   C       : complex type
16
   sz      : size
17
   vecsz   : vector size
18
   is, os  : input/output stride
19
   ri, ii  : real/imag input (complex data)
20
   ro, io  : real/imag output (complex data)
21
   I, O    : real input/output (real data)
22
   A       : assert
23
   CK      : check
24
   S       : solver, defined internally to each solver file
25
   P       : plan, defined internally to each solver file
26
   k       : codelet
27
   X(...)  : used for mangling of external names (see below)
28
   K(...)  : floating-point constant, in E precision
29

  
30
   If a name is used often and must have the form fftw_foo to avoid
31
   namespace pollution, #define FOO fftw_foo and use the short name.
32

  
33
   Leave that hungarian crap to MS.  foo_t counts as hungarian: use
34
   foo instead.  foo is lowercase so that it does not look like a DOS
35
   program. Exception: typedef struct foo_s {...} foo;  instead of
36
   typedef struct foo {...} foo;  for C++ compatibility.
37

  
38
NAME MANGLING: use X(foo) for external names instead of fftw_foo.
39
    X(foo) expands to fftwf_foo or fftw_foo, depending on the
40
    precision.  (Unfortunately, this is a ugly form of hungarian
41
    notation.  Grrr...)  Names that are not exported do not need to be
42
    mangled.
43

  
44
REPEATED CODE: favor a table.  E.g., do not write
45

  
46
    foo("xxx", 1);
47
    foo("yyy", 2);
48
    foo("zzz", -1);
49

  
50
    Instead write
51

  
52
      struct { const char *nam, int arg } footab[] = {
53
	{ "xxx", 1 },
54
	{ "yyy", 2 },
55
	{ "zzz", -1 }
56
      };
57

  
58
    and loop over footab.  Rationale: it saves code space.
59
    Similarly, replace a switch statement with a table whenever
60
    possible.
61

  
62
C++: The code should compile as a C++ program. Run the code through
63
    gcc -xc++ .  The extra C++ restrictions are unnecessary, of
64
    course, but this will save us from a flood of complaints when
65
    we release the code.
src/fftw-3.3.8/COPYING
1
		    GNU GENERAL PUBLIC LICENSE
2
		       Version 2, June 1991
3

  
4
 Copyright (C) 1989, 1991 Free Software Foundation, Inc.
5
     51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
6
 Everyone is permitted to copy and distribute verbatim copies
7
 of this license document, but changing it is not allowed.
8

  
9
			    Preamble
10

  
11
  The licenses for most software are designed to take away your
12
freedom to share and change it.  By contrast, the GNU General Public
13
License is intended to guarantee your freedom to share and change free
14
software--to make sure the software is free for all its users.  This
15
General Public License applies to most of the Free Software
16
Foundation's software and to any other program whose authors commit to
17
using it.  (Some other Free Software Foundation software is covered by
18
the GNU Library General Public License instead.)  You can apply it to
19
your programs, too.
20

  
21
  When we speak of free software, we are referring to freedom, not
22
price.  Our General Public Licenses are designed to make sure that you
23
have the freedom to distribute copies of free software (and charge for
24
this service if you wish), that you receive source code or can get it
25
if you want it, that you can change the software or use pieces of it
26
in new free programs; and that you know you can do these things.
27

  
28
  To protect your rights, we need to make restrictions that forbid
29
anyone to deny you these rights or to ask you to surrender the rights.
30
These restrictions translate to certain responsibilities for you if you
31
distribute copies of the software, or if you modify it.
32

  
33
  For example, if you distribute copies of such a program, whether
34
gratis or for a fee, you must give the recipients all the rights that
35
you have.  You must make sure that they, too, receive or can get the
36
source code.  And you must show them these terms so they know their
37
rights.
38

  
39
  We protect your rights with two steps: (1) copyright the software, and
40
(2) offer you this license which gives you legal permission to copy,
41
distribute and/or modify the software.
42

  
43
  Also, for each author's protection and ours, we want to make certain
44
that everyone understands that there is no warranty for this free
45
software.  If the software is modified by someone else and passed on, we
46
want its recipients to know that what they have is not the original, so
47
that any problems introduced by others will not reflect on the original
48
authors' reputations.
49

  
50
  Finally, any free program is threatened constantly by software
51
patents.  We wish to avoid the danger that redistributors of a free
52
program will individually obtain patent licenses, in effect making the
53
program proprietary.  To prevent this, we have made it clear that any
54
patent must be licensed for everyone's free use or not licensed at all.
55

  
56
  The precise terms and conditions for copying, distribution and
57
modification follow.
58

59
		    GNU GENERAL PUBLIC LICENSE
60
   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
61

  
62
  0. This License applies to any program or other work which contains
63
a notice placed by the copyright holder saying it may be distributed
64
under the terms of this General Public License.  The "Program", below,
65
refers to any such program or work, and a "work based on the Program"
66
means either the Program or any derivative work under copyright law:
67
that is to say, a work containing the Program or a portion of it,
68
either verbatim or with modifications and/or translated into another
69
language.  (Hereinafter, translation is included without limitation in
70
the term "modification".)  Each licensee is addressed as "you".
71

  
72
Activities other than copying, distribution and modification are not
73
covered by this License; they are outside its scope.  The act of
74
running the Program is not restricted, and the output from the Program
75
is covered only if its contents constitute a work based on the
76
Program (independent of having been made by running the Program).
77
Whether that is true depends on what the Program does.
78

  
79
  1. You may copy and distribute verbatim copies of the Program's
80
source code as you receive it, in any medium, provided that you
81
conspicuously and appropriately publish on each copy an appropriate
82
copyright notice and disclaimer of warranty; keep intact all the
83
notices that refer to this License and to the absence of any warranty;
84
and give any other recipients of the Program a copy of this License
85
along with the Program.
86

  
87
You may charge a fee for the physical act of transferring a copy, and
88
you may at your option offer warranty protection in exchange for a fee.
89

  
90
  2. You may modify your copy or copies of the Program or any portion
91
of it, thus forming a work based on the Program, and copy and
92
distribute such modifications or work under the terms of Section 1
93
above, provided that you also meet all of these conditions:
94

  
95
    a) You must cause the modified files to carry prominent notices
96
    stating that you changed the files and the date of any change.
97

  
98
    b) You must cause any work that you distribute or publish, that in
99
    whole or in part contains or is derived from the Program or any
100
    part thereof, to be licensed as a whole at no charge to all third
101
    parties under the terms of this License.
102

  
103
    c) If the modified program normally reads commands interactively
104
    when run, you must cause it, when started running for such
105
    interactive use in the most ordinary way, to print or display an
106
    announcement including an appropriate copyright notice and a
107
    notice that there is no warranty (or else, saying that you provide
108
    a warranty) and that users may redistribute the program under
109
    these conditions, and telling the user how to view a copy of this
110
    License.  (Exception: if the Program itself is interactive but
111
    does not normally print such an announcement, your work based on
112
    the Program is not required to print an announcement.)
113

114
These requirements apply to the modified work as a whole.  If
115
identifiable sections of that work are not derived from the Program,
116
and can be reasonably considered independent and separate works in
117
themselves, then this License, and its terms, do not apply to those
118
sections when you distribute them as separate works.  But when you
119
distribute the same sections as part of a whole which is a work based
120
on the Program, the distribution of the whole must be on the terms of
121
this License, whose permissions for other licensees extend to the
122
entire whole, and thus to each and every part regardless of who wrote it.
123

  
124
Thus, it is not the intent of this section to claim rights or contest
125
your rights to work written entirely by you; rather, the intent is to
126
exercise the right to control the distribution of derivative or
127
collective works based on the Program.
128

  
129
In addition, mere aggregation of another work not based on the Program
130
with the Program (or with a work based on the Program) on a volume of
131
a storage or distribution medium does not bring the other work under
132
the scope of this License.
133

  
134
  3. You may copy and distribute the Program (or a work based on it,
135
under Section 2) in object code or executable form under the terms of
136
Sections 1 and 2 above provided that you also do one of the following:
137

  
138
    a) Accompany it with the complete corresponding machine-readable
139
    source code, which must be distributed under the terms of Sections
140
    1 and 2 above on a medium customarily used for software interchange; or,
141

  
142
    b) Accompany it with a written offer, valid for at least three
143
    years, to give any third party, for a charge no more than your
144
    cost of physically performing source distribution, a complete
145
    machine-readable copy of the corresponding source code, to be
146
    distributed under the terms of Sections 1 and 2 above on a medium
147
    customarily used for software interchange; or,
148

  
149
    c) Accompany it with the information you received as to the offer
150
    to distribute corresponding source code.  (This alternative is
151
    allowed only for noncommercial distribution and only if you
152
    received the program in object code or executable form with such
153
    an offer, in accord with Subsection b above.)
154

  
155
The source code for a work means the preferred form of the work for
156
making modifications to it.  For an executable work, complete source
157
code means all the source code for all modules it contains, plus any
158
associated interface definition files, plus the scripts used to
159
control compilation and installation of the executable.  However, as a
160
special exception, the source code distributed need not include
161
anything that is normally distributed (in either source or binary
162
form) with the major components (compiler, kernel, and so on) of the
163
operating system on which the executable runs, unless that component
164
itself accompanies the executable.
165

  
166
If distribution of executable or object code is made by offering
167
access to copy from a designated place, then offering equivalent
168
access to copy the source code from the same place counts as
169
distribution of the source code, even though third parties are not
170
compelled to copy the source along with the object code.
171

172
  4. You may not copy, modify, sublicense, or distribute the Program
173
except as expressly provided under this License.  Any attempt
174
otherwise to copy, modify, sublicense or distribute the Program is
175
void, and will automatically terminate your rights under this License.
176
However, parties who have received copies, or rights, from you under
177
this License will not have their licenses terminated so long as such
178
parties remain in full compliance.
179

  
180
  5. You are not required to accept this License, since you have not
181
signed it.  However, nothing else grants you permission to modify or
182
distribute the Program or its derivative works.  These actions are
183
prohibited by law if you do not accept this License.  Therefore, by
184
modifying or distributing the Program (or any work based on the
185
Program), you indicate your acceptance of this License to do so, and
186
all its terms and conditions for copying, distributing or modifying
187
the Program or works based on it.
188

  
189
  6. Each time you redistribute the Program (or any work based on the
190
Program), the recipient automatically receives a license from the
191
original licensor to copy, distribute or modify the Program subject to
192
these terms and conditions.  You may not impose any further
193
restrictions on the recipients' exercise of the rights granted herein.
194
You are not responsible for enforcing compliance by third parties to
195
this License.
196

  
197
  7. If, as a consequence of a court judgment or allegation of patent
198
infringement or for any other reason (not limited to patent issues),
199
conditions are imposed on you (whether by court order, agreement or
200
otherwise) that contradict the conditions of this License, they do not
201
excuse you from the conditions of this License.  If you cannot
202
distribute so as to satisfy simultaneously your obligations under this
203
License and any other pertinent obligations, then as a consequence you
204
may not distribute the Program at all.  For example, if a patent
205
license would not permit royalty-free redistribution of the Program by
206
all those who receive copies directly or indirectly through you, then
207
the only way you could satisfy both it and this License would be to
208
refrain entirely from distribution of the Program.
209

  
210
If any portion of this section is held invalid or unenforceable under
211
any particular circumstance, the balance of the section is intended to
212
apply and the section as a whole is intended to apply in other
213
circumstances.
214

  
215
It is not the purpose of this section to induce you to infringe any
216
patents or other property right claims or to contest validity of any
217
such claims; this section has the sole purpose of protecting the
218
integrity of the free software distribution system, which is
219
implemented by public license practices.  Many people have made
220
generous contributions to the wide range of software distributed
221
through that system in reliance on consistent application of that
222
system; it is up to the author/donor to decide if he or she is willing
223
to distribute software through any other system and a licensee cannot
224
impose that choice.
225

  
226
This section is intended to make thoroughly clear what is believed to
227
be a consequence of the rest of this License.
228

229
  8. If the distribution and/or use of the Program is restricted in
230
certain countries either by patents or by copyrighted interfaces, the
231
original copyright holder who places the Program under this License
232
may add an explicit geographical distribution limitation excluding
233
those countries, so that distribution is permitted only in or among
234
countries not thus excluded.  In such case, this License incorporates
235
the limitation as if written in the body of this License.
236

  
237
  9. The Free Software Foundation may publish revised and/or new versions
238
of the General Public License from time to time.  Such new versions will
239
be similar in spirit to the present version, but may differ in detail to
240
address new problems or concerns.
241

  
242
Each version is given a distinguishing version number.  If the Program
243
specifies a version number of this License which applies to it and "any
244
later version", you have the option of following the terms and conditions
245
either of that version or of any later version published by the Free
246
Software Foundation.  If the Program does not specify a version number of
247
this License, you may choose any version ever published by the Free Software
248
Foundation.
249

  
250
  10. If you wish to incorporate parts of the Program into other free
251
programs whose distribution conditions are different, write to the author
252
to ask for permission.  For software which is copyrighted by the Free
253
Software Foundation, write to the Free Software Foundation; we sometimes
254
make exceptions for this.  Our decision will be guided by the two goals
255
of preserving the free status of all derivatives of our free software and
256
of promoting the sharing and reuse of software generally.
257

  
258
			    NO WARRANTY
259

  
260
  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
262
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
266
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
267
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268
REPAIR OR CORRECTION.
269

  
270
  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278
POSSIBILITY OF SUCH DAMAGES.
279

  
280
		     END OF TERMS AND CONDITIONS
281

282
	    How to Apply These Terms to Your New Programs
283

  
284
  If you develop a new program, and you want it to be of the greatest
285
possible use to the public, the best way to achieve this is to make it
286
free software which everyone can redistribute and change under these terms.
287

  
288
  To do so, attach the following notices to the program.  It is safest
289
to attach them to the start of each source file to most effectively
290
convey the exclusion of warranty; and each file should have at least
291
the "copyright" line and a pointer to where the full notice is found.
292

  
293
    <one line to give the program's name and a brief idea of what it does.>
294
    Copyright (C) <year>  <name of author>
295

  
296
    This program is free software; you can redistribute it and/or modify
297
    it under the terms of the GNU General Public License as published by
298
    the Free Software Foundation; either version 2 of the License, or
299
    (at your option) any later version.
300

  
301
    This program is distributed in the hope that it will be useful,
302
    but WITHOUT ANY WARRANTY; without even the implied warranty of
303
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
304
    GNU General Public License for more details.
305

  
306
    You should have received a copy of the GNU General Public License
307
    along with this program; if not, write to the Free Software
308
    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
309

  
310

  
311
Also add information on how to contact you by electronic and paper mail.
312

  
313
If the program is interactive, make it output a short notice like this
314
when it starts in an interactive mode:
315

  
316
    Gnomovision version 69, Copyright (C) year  name of author
317
    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
318
    This is free software, and you are welcome to redistribute it
319
    under certain conditions; type `show c' for details.
320

  
321
The hypothetical commands `show w' and `show c' should show the appropriate
322
parts of the General Public License.  Of course, the commands you use may
323
be called something other than `show w' and `show c'; they could even be
324
mouse-clicks or menu items--whatever suits your program.
325

  
326
You should also get your employer (if you work as a programmer) or your
327
school, if any, to sign a "copyright disclaimer" for the program, if
328
necessary.  Here is a sample; alter the names:
329

  
330
  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
331
  `Gnomovision' (which makes passes at compilers) written by James Hacker.
332

  
333
  <signature of Ty Coon>, 1 April 1989
334
  Ty Coon, President of Vice
335

  
336
This General Public License does not permit incorporating your program into
337
proprietary programs.  If your program is a subroutine library, you may
338
consider it more useful to permit linking proprietary applications with the
339
library.  If this is what you want to do, use the GNU Library General
340
Public License instead of this License.
src/fftw-3.3.8/COPYRIGHT
1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
src/fftw-3.3.8/ChangeLog
1
commit 700745cdbb34e964e1abda86183809fd8dd95796
2
Author: Matteo Frigo <athena@fftw.org>
3
Date:   Thu May 24 08:00:45 2018 -0400
4

  
5
    Bump FFTW_MINOR_VERSION for fftw-3.3.8
6

  
7
commit 902d0982522cdf6f0acd60f01f59203824e8e6f3
8
Author: Matteo Frigo <athena@fftw.org>
9
Date:   Thu May 24 07:43:02 2018 -0400
10

  
11
    update NEWS
12

  
13
commit 41b0d9eff394891ba3327b9062811d48677bb411
14
Author: Matteo Frigo <athena@fftw.org>
15
Date:   Thu May 24 07:35:36 2018 -0400
16

  
17
    CFLAGS: don't use -ffast-math
18
    
19
    -ffast-math is a relic from 1999 when it was kind of necessary for
20
    full use of FMA on powerpc.  Nowadays it is just a liability.  For
21
    example, 'gcc-8 -ffast-math' ignores the disctintion between +0 and
22
    -0, thus breaking the avx and avx2 implementations in fftw-3.7.
23

  
24
commit 19eeeca592f63413698f23dd02b9961f22581803
25
Author: Matteo Frigo <athena@fftw.org>
26
Date:   Thu May 24 07:29:00 2018 -0400
27

  
28
    Fixes for gcc-8
29
    
30
    It looks like 'gcc-8 -ffast-math' does honor the distinction between
31
    +0.0 and -0.0 in floating-point constants.  I suppose that technically
32
    -ffast-math has the right to do so.
33
    
34
    For good measure, this patch encodes such constants as their explicit
35
    binary representation.  A separate patch will disable -ffast-math.
36

  
37
commit bf478afbf2367df0f38c77f31d1f912aeeb82585
38
Author: Miklos Espak <miklos.espak@motilent.co.uk>
39
Date:   Thu Apr 26 18:31:57 2018 +0100
40

  
41
    Define include directory for installed targets (#141)
42

  
43
commit ab888adf510338c03ea8ac49b4aab91fb57f1479
44
Author: Steven G. Johnson <stevenj@mit.edu>
45
Date:   Sat Apr 14 11:40:39 2018 -0400
46

  
47
    don't need both identifier and name fields
48

  
49
commit 2b999c600c58c78b8acb78c3352b02d9df6f6e60
50
Author: Steven G. Johnson <stevenj@alum.mit.edu>
51
Date:   Fri Apr 13 08:43:35 2018 -0400
52

  
53
    JSON doesn't like trailing commas
54

  
55
commit 92eee8bbc4252c871aa870d2dce88eb98d0c7d18
56
Author: Steven G. Johnson <stevenj@alum.mit.edu>
57
Date:   Fri Apr 13 08:38:50 2018 -0400
58

  
59
    list both C and OCaml (as explained in codemeta/codemeta#181)
60

  
61
commit 35e5609f17e212bf1c40da9b2ebe66784ad37052
62
Author: Steven G. Johnson <stevenj@alum.mit.edu>
63
Date:   Thu Apr 12 12:01:15 2018 -0400
64

  
65
    add codemeta file
66

  
67
commit eba07c46b5d2f7824d293ab59aa5c29a25034963
68
Author: Matteo Frigo <athena@fftw.org>
69
Date:   Mon Feb 19 09:30:29 2018 -0500
70

  
71
    Call _mm256_zeroupper() when leaving avx512 code
72
    
73
    Carsten Steger says:
74
    
75
       simd-avx512.h defines VLEAVE as nothing in FFTW 3.3.7.  However, the
76
       current Intel® 64 and IA-32 Architectures Optimization Reference Manual,
77
       chapter 15.18, recommends the following:
78
       - When you have to mix group B instructions with Intel SSE instructions,
79
         or you suspect that such a mixture might occur, use the VZEROUPPER
80
         instruction whenever a transition is expected.
81
       - Add VZEROUPPER after group B instructions were executed and before any
82
         function call that might lead to Intel SSE instruction execution.
83
       - Add VZEROUPPER at the end of any function that uses group B instructions.
84
       - Add VZEROUPPER before thread creation if not already in a clean state
85
         so that the thread does not inherit Dirty Upper State.
86
       (Group B are instruction types that modify bits 128-511 of vector
87
       registers 0-15.)
88
    
89
       Therefore, I believe it would be prudent to define VLEAVE as
90
       _mm256_zeroupper in simd-avx512.h (see the attached patch).
91
    
92
    At https://software.intel.com/en-us/forums/intel-isa-extensions/topic/704023
93
    Mark Charney says:
94
    
95
       To be clear, we very much still recommend using VZEROUPPER on
96
       Skylake. Even though it does not have the same penalties as earlier
97
       designs in that family for mixing AVX and SSE code, we definitely
98
       recommend using VZEROUPPER on Skylake.
99
    
100
       Yes it would obviously be better if there were one solution.  For
101
       code that has to run on both families, the "common code" solution
102
       is to use the Xeon guidelines.
103
    
104
    If Mark Charney recommends VZEROUPPER, that's good enough for me.
105

  
106
commit b267008613d082975b108252ed596ba0916ffa31
107
Author: Matteo Frigo <athena@fftw.org>
108
Date:   Wed Nov 22 12:54:18 2017 -0500
109

  
110
    fftw3-mpi.f03 should be regenerated when Makefile changes
111

  
112
commit 708b202fd593cf1002cf97dce0863e2a438e3720
113
Merge: 2e0cfdda 8ba34c40
114
Author: Matteo Frigo <athena@fftw.org>
115
Date:   Mon Nov 20 09:37:17 2017 -0500
116

  
117
    Merge pull request #113 from xantares/mingw
118
    
119
    CMake enhancements
120

  
121
commit 2e0cfddacacccc8a1e6e679c5e3fa81fb0219bda
122
Author: Matteo Frigo <athena@fftw.org>
123
Date:   Mon Nov 20 07:07:30 2017 -0500
124

  
125
    Attempt to strengthen language in README.md
126

  
127
commit 8ba34c40fef38f661c9c413781990a7c021ba22b
128
Author: Michel Zou <xantares09@hotmail.com>
129
Date:   Thu Nov 9 22:33:51 2017 +0100
130

  
131
    Preliminary Fortran support
132

  
133
commit bd753a7679ecca2799640e7c8ced6f1f784f1b51
134
Author: Michel Zou <xantares09@hotmail.com>
135
Date:   Mon Nov 6 23:00:29 2017 +0100
136

  
137
    CMake MinGW fixes
138
    
139
    Mostly fixes the SSE2 macro in config.h, otherwise minor detection fixes
140

  
141
commit da5372a175bcb09578359960869c76da74c9fda3
142
Author: Matteo Frigo <athena@fftw.org>
143
Date:   Tue Oct 31 20:21:17 2017 -0400
144

  
145
    EXTRA_DIST += README-perfcnt.md
146

  
147
commit 1b64d9269254e9d0a0f0b088e5eceb0db92d531f
148
Merge: b5ccc557 2be183c3
149
Author: Matteo Frigo <athena@fftw.org>
150
Date:   Tue Oct 31 20:19:13 2017 -0400
151

  
152
    Merge pull request #112 from alexeicolin/PR--armv7-pmccntr-counter-and-docs
153
    
154
    Pr  armv7 pmccntr counter and docs
155

  
156
commit 2be183c3a44d58aaa11909ba8882310fb44d598c
157
Author: Alexei Colin <ac@alexeicolin.com>
158
Date:   Tue Oct 31 23:34:38 2017 +0000
159

  
160
    perf counters: name ARMv8 PMCCNTR_EL0 explicitly
161
    
162
    For consistency with the rest.
163

  
164
commit 504ece7f8ffc60c2a03b28d977e9825230052d48
165
Author: Alexei Colin <ac@alexeicolin.com>
166
Date:   Tue Oct 31 23:28:48 2017 +0000
167

  
168
    perf counters: add PMCCNTR for ARMv7 and add docs
169
    
170
    The existing armv7 counter (CNTVCT) does need enabling from kernel mode (so
171
    updated the configure help), and the enable bit is different from the PMU
172
    enable bit (described in the new docs).
173
    
174
    Tested on XU4: printed the returned counter values and they look reasonable.
175

  
176
commit b5ccc557fd2e57bfc955f0db9b5182e92f9cb55c
177
Author: Matteo Frigo <athena@fftw.org>
178
Date:   Sun Oct 29 08:13:04 2017 -0400
179

  
180
    fftw-mpi.h should include <fftw3.h>, not "fftw3.h"
181

  
182
commit 9e3f8da20e65f1e34e677768e550086b06d77f16
183
Author: Matteo Frigo <athena@fftw.org>
184
Date:   Sun Oct 29 08:09:35 2017 -0400
185

  
186
    NEWS: warn that cmake support is experimental and not well tested
187

  
188
commit 9616fb9ff1c2694f5cfa2c4a59efa96094ae6812
189
Author: Matteo Frigo <athena@fftw.org>
190
Date:   Sun Oct 29 07:48:43 2017 -0400
191

  
192
    Update NEWS for upcoming fftw-3.3.7
193

  
194
commit 62edb203fc09c8c8ac2c2d5ac3299ea8d4dc7838
195
Author: Matteo Frigo <athena@fftw.org>
196
Date:   Tue Oct 10 18:58:37 2017 -0400
197

  
198
    Ditch --enable-debug-malloc and --enable-debug-alignment
199
    
200
    We wrote DEBUG_MALLOC in 1997 to debug memory leaks.  Nowadays
201
    DEBUG_MALLOC is just confusing.  Better tools are available, and
202
    DEBUG_MALLOC is not thread-safe and it does not respect SIMD
203
    alignment.  It confused at least one user.
204
    
205
    In the gcc-2.SOMETHING days, gcc would allocate doubles on the stack
206
    at 4-byte boundary (vs. 8) reducing performance by a factor of 3.
207
    That's when we introduced --enable-debug-alignment, which is totally
208
    obsolete by now.
209

  
210
commit 6ed4297e85e5ef24a18ce428b18e020d8e48413a
211
Author: Matteo Frigo <athena@fftw.org>
212
Date:   Fri Sep 29 19:27:43 2017 -0400
213

  
214
    Use armv7a cycle counter unconditionally if HAVE_ARMV7A_CNTVCT
215
    
216
    It looks like __ARM_ARCH_7A__ is not always defined.  If the
217
    user says HAVE_ARMV7A_CNTVCT, trust the user.
218

  
219
commit 2dd77382319ceb99c32b38418716783eec8adad4
220
Merge: 04590cb1 e09ab8ca
221
Author: Matteo Frigo <athena@fftw.org>
222
Date:   Thu Sep 21 22:42:38 2017 -0400
223

  
224
    Merge pull request #110 from junghans/cmake
225
    
226
    Minor cmake fixes
227

  
228
commit e09ab8cac98c0f206968bbd962a6f76cf26e7437
229
Merge: 890dac59 76427f30
230
Author: Christoph Junghans <junghans@votca.org>
231
Date:   Thu Sep 21 16:13:43 2017 -0600
232

  
233
    Merge commit 'refs/pull/109/head' of github.com:FFTW/fftw3 into cmake
234

  
235
commit 04590cb11baa11bbfdebe101fa90186bbf48423c
236
Author: Matteo Frigo <athena@fftw.org>
237
Date:   Thu Sep 21 18:00:58 2017 -0400
238

  
239
    simd-vsx.h: don't use vpermxor
240
    
241
    It seems like gcc-6 generates incorrect code when using vpermxor
242
    (tested with qemu emulator, so there is a chance that gcc is right and
243
    qemu is wrong).  Disable the use of vpermxor and do the simple thing
244
    (one multiplication + one permutation).
245

  
246
commit 76427f30080e2cab3ca5047193ce8ffe6110f047
247
Author: Michel Zou <xantares09@hotmail.com>
248
Date:   Thu Sep 21 23:44:15 2017 +0200
249

  
250
    No need to list includes
251

  
252
commit e47e9a81c41454e5e128cd68505b38152ad60500
253
Author: Matteo Frigo <athena@fftw.org>
254
Date:   Thu Sep 21 17:13:14 2017 -0400
255

  
256
    Remove AC_FUNC_{MALLOC,REALLOC,MMAP}
257
    
258
    They don't do what I thought.  E.g., AC_FUNC_MALLOC checks that
259
    malloc(0) returns NULL, and defines malloc to be rpl_malloc otherwise.
260
    We don't support rpl_malloc() and we don't care about malloc(0).
261

  
262
commit 5aebc02ff30af12d2dc3be6c762e821a38f56595
263
Author: Matteo Frigo <athena@fftw.org>
264
Date:   Thu Sep 21 10:09:02 2017 -0400
265

  
266
    Dead-Code Police
267

  
268
commit d97394a17250d71d6a722ae64dcc3123130cf08f
269
Author: Matteo Frigo <athena@fftw.org>
270
Date:   Thu Sep 21 09:54:36 2017 -0400
271

  
272
    Fixup fftw3-mpi.h
273
    
274
    fftw3-mpi.h must include "fftw3.h", not "api/fftw3.h", because both
275
    fftw3-mpi.h and fftw3.h will ultimately be installed in /usr/include.
276
    
277
    Thus, as a special exception, mpi/Makefile.am must specify the include
278
    path -I $(top_srcdir)/api.
279

  
280
commit 890dac59aca4c153e7e22add0a8de00766227670
281
Merge: 4ebda892 106582aa
282
Author: Christoph Junghans <junghans@votca.org>
283
Date:   Wed Sep 20 14:44:04 2017 -0600
284

  
285
    Merge commit 'refs/pull/109/head' of github.com:FFTW/fftw3 into cmake
286

  
287
commit 4ebda89297b6b38632c3d91bd5a673a1bee4ffff
288
Author: Christoph Junghans <junghans@votca.org>
289
Date:   Wed Sep 20 14:05:13 2017 -0600
290

  
291
    autotools: fix install of FFTW3ConfigVersion.cmake
292

  
293
commit e9a66d5f748037f9cb9c0f5b8d824d73c0425042
294
Author: Christoph Junghans <junghans@votca.org>
295
Date:   Wed Sep 20 13:29:29 2017 -0600
296

  
297
    cmake: use GNUInstallDirs
298

  
299
commit 4fbb72ad294e2070d64a83b24f89a601d4f624c6
300
Author: Matteo Frigo <athena@fftw.org>
301
Date:   Wed Sep 20 13:11:55 2017 -0400
302

  
303
    Generate codlist.c only when MAINTAINER_MODE
304
    
305
    The user is not supposed to regenerate .c files.  In addition, the
306
    generation rule is subtly nonportable (it depends on whether or not
307
    '#' can be escaped in Makefiles, an issue that does not appear
308
    settled.)
309

  
310
commit f243f8ce48be61952527d43da222096296fdd2f9
311
Author: Matteo Frigo <athena@fftw.org>
312
Date:   Wed Sep 20 11:54:13 2017 -0400
313

  
314
    Generate {dft,rdft}/simd/{sse,sse2,avx,...}/*.c only when MAINTAINER_MODE
315
    
316
    Users are not supposed to generate them.  Apart from that, the
317
    generation rule uses '$*' in an explicit make rule, which is
318
    technically a GNU extension.  (Works with {open,free}bsd, but breaks
319
    Solaris.)
320

  
321
commit 106582aa8f97257f53730cbac81f98e8659b084c
322
Author: Michel Zou <xantares09@hotmail.com>
323
Date:   Wed Sep 20 15:46:51 2017 +0200
324

  
325
    Fix includes, export target
326

  
327
commit 1a24e67165ba56447f814bcdc12b9d6e083f1670
328
Author: Matteo Frigo <athena@fftw.org>
329
Date:   Wed Sep 20 07:24:58 2017 -0400
330

  
331
    Restore the ability to build out of tree.
332
    
333
    Before 1f3704b9, we had "-I $(top_srcdir)/foo -I $(top_srcdir)/bar".
334
    After 1f3704b9, we had no -I specification at all, but automake wants
335
    an explicit -I $(top_srcdir) in order to build out of tree.
336

  
337
commit 919b795940d1e86a948a4430193dbd0853f47272
338
Merge: 6076339a f7a64365
339
Author: Matteo Frigo <athena@fftw.org>
340
Date:   Wed Sep 20 06:41:50 2017 -0400
341

  
342
    Merge pull request #107 from xantares/config-mode
343
    
344
    Config mode
345

  
346
commit f7a6436509d324297783eb77df54010320b062f8
347
Author: Michel Zou <xantares09@hotmail.com>
348
Date:   Wed Sep 20 11:46:05 2017 +0200
349

  
350
    Build bench according to BUILD_TESTS
351

  
352
commit 82cec28b7e14280ad11878978e23a3680bb0e983
353
Author: Michel Zou <xantares09@hotmail.com>
354
Date:   Wed Sep 20 11:41:20 2017 +0200
355

  
356
    Use cmake config mode
357
    
358
    Installs FFTW3Config.cmake instead of a FindFFTW3.cmake
359
    Also configures the pkgconfig file from cmake
360

  
361
commit 6076339a342b12b0d0cfd9f6d967bfa9fbf6b1b2
362
Author: Matteo Frigo <athena@fftw.org>
363
Date:   Tue Sep 19 23:38:27 2017 -0400
364

  
365
    Fix performance regression with gcc-3.3
366

  
367
commit f4c37657cb32b2552c5e86f0540c0308d4f451ef
368
Author: Matteo Frigo <athena@fftw.org>
369
Date:   Tue Sep 19 23:24:08 2017 -0400
370

  
371
    get rid of the sse2-nonportable.c hack
372
    
373
    It was necessary to support some broken compiler 15 years ago.
374
    Remove it and see if anybody complains.
375

  
376
commit 362ae5c7b8a9df76b5ec0de4433131db33bae0ae
377
Author: Matteo Frigo <athena@fftw.org>
378
Date:   Tue Sep 19 21:44:13 2017 -0400
379

  
380
    configure.ac Police
381
    
382
    Remove some obsolete AC_CHECK_HEADERS, add new checks suggested by
383
    autoscan.
384

  
385
commit a56b5b4b149e56fce43778172a56f77d30352833
386
Author: Matteo Frigo <athena@fftw.org>
387
Date:   Tue Sep 19 21:43:45 2017 -0400
388

  
389
    Include Police
390
    
391
    fftw-wisdom.c was including <fftw3.h> instead of "api/fftw3.h"
392

  
393
commit 1f3704b9eff4b7e80ef7d775fb13f5bb8de0a5f1
394
Author: Matteo Frigo <athena@fftw.org>
395
Date:   Tue Sep 19 21:12:22 2017 -0400
396

  
397
    Do not set include path ("-I") in Makefile.am
398
    
399
    .[ch] files should specify their own paths explicitly.  Setting paths
400
    in the Makefile was always a bad idea, but it is totally untenable if
401
    we are supporting cmake.
402

  
403
commit 6e0ae04bad14a7dd9b4928f22d7a01e887dfdc03
404
Author: Matteo Frigo <athena@fftw.org>
405
Date:   Tue Sep 19 19:31:55 2017 -0400
406

  
407
    Fix OpenBSD build
408
    
409
    Using $< in a non-suffix rule context is a GNUmake idiom and OpenBSD
410
    doesn't like it.
411

  
412
commit 31a53789197f90d6bf349dd230ab86023e5fb83c
413
Author: Matteo Frigo <athena@fftw.org>
414
Date:   Tue Sep 19 19:24:34 2017 -0400
415

  
416
    EXTRA_DIST += FindFFTW3.cmake.in
417

  
418
commit ae1a764ce88166e8e1f05a25888f105ec8f1939d
419
Merge: 5fdca1d9 97b273d8
420
Author: Matteo Frigo <athena@fftw.org>
421
Date:   Tue Sep 19 17:13:58 2017 -0400
422

  
423
    Merge pull request #69 from junghans/cmake
424
    
425
    Build und install cmake module
426

  
427
commit 5fdca1d9b0a0b2e6491c98f63873dcf600355e09
428
Merge: b521e530 66506470
429
Author: Matteo Frigo <athena@fftw.org>
430
Date:   Tue Sep 19 15:57:59 2017 -0400
431

  
432
    Merge pull request #92 from tklauser/armv7a-cycle-counter
433
    
434
    Fix ARMV7-A cycle counter detection
435

  
436
commit b521e5305a7317c1c0f1d454beb6580eaf4de1db
437
Author: Matteo Frigo <athena@fftw.org>
438
Date:   Tue Sep 19 15:51:03 2017 -0400
439

  
440
    cmake: don't check for dlfcn.h
441
    
442
    We don't use it
443

  
444
commit fc852fcdfa80fab30eac2284249686853efa2e4b
445
Author: Matteo Frigo <athena@fftw.org>
446
Date:   Tue Sep 19 15:43:02 2017 -0400
447

  
448
    Remove ancient paranoia
449
    
450
    In the '90s we used to run autoconf three times, just in case
451
    (because it really didn't work the first time).  "Three" was modeled
452
    after the "sync; sync; sync; reboot" incantation of the '80s.
453
    
454
    Hopefully we are past this by now.
455

  
456
commit 34738e7f669882c6abc12c2744c8acc347c91719
457
Author: Matteo Frigo <athena@fftw.org>
458
Date:   Tue Sep 19 15:32:39 2017 -0400
459

  
460
    Flip boolean in a way that makes more sense to me
461

  
462
commit a2bfd859d9ad08490d02252d8a80c5994dd82747
463
Author: Matteo Frigo <athena@fftw.org>
464
Date:   Tue Sep 19 15:28:56 2017 -0400
465

  
466
    Various CMakeLists.txt fixes
467
    
468
    * AVX2 codelets require -mfma
469
    
470
    * --enable-avx2 automatically enables the 128-bit avx2 codelets in
471
      *dft/simd/avx2-128
472
    
473
    * bump FFTW_VERSION to 3.3.7, SOVERSION to 3.5.7
474
    
475
    * build bench always, irrespective of Threads_FOUND
476

  
477
commit 93ac6e1075e73c0275a9e0006fe9161c3b6fae38
478
Merge: a71f3dd3 d3a8d13f
479
Author: Matteo Frigo <athena@fftw.org>
480
Date:   Tue Sep 19 14:31:03 2017 -0400
481

  
482
    Merge pull request #103 from xantares/cmake
483
    
484
    Add user cmake support
485
    
486
    Still needs work, but let's move forward and move this contribution into the official repository
487

  
488
commit d3a8d13f74361a7ffc4c48c229181a86b35e9a7d
489
Author: Michel Zou <xantares09@hotmail.com>
490
Date:   Tue Jul 18 12:16:43 2017 +0200
491

  
492
    Add user cmake infrastructure
493

  
494
commit a71f3dd355f802dc362a52674a977ff81daadf9d
495
Author: Matteo Frigo <athena@fftw.org>
496
Date:   Wed Jul 5 06:33:40 2017 -0400
497

  
498
    Disable ISA_EXTENSION_PREFERS_FMA for now
499
    
500
    I still don't understand whether or not avx2 should use FMA codelets.
501
    Ryzen is faster with the non-FMA version.  Haswell prefers the FMA
502
    version.
503
    
504
    However, I suspect that Haswell prefers FMA because of a quirk of the
505
    micro-architecture.  Haswell has two floating-point "ports".  You can
506
    issue an addition only through one "port", but you can issue two FMA
507
    in parallel on both ports, so FMA appears to be faster.  Skylake
508
    apparently restores balance (but I haven't tried yet).  Suspend
509
    judgment for now until I gather more data.
510

  
511
commit f82b8c94596868897987b71a648eaa664590602a
512
Author: Matteo Frigo <athena@fftw.org>
513
Date:   Tue Jul 4 20:06:57 2017 -0400
514

  
515
    Rationalize HAVE_FMA
516
    
517
    Distinguish ARCH_PREFERS_FMA, for architectures that "naturally"
518
    prefer FMA (e.g., powerpc), from ISA_EXTENSION_PREFERS_FMA, for
519
    instruction-set extensions that favor FMA where the base architecture
520
    does not (e.g., avx2 on x86).
521
    
522
    Previously, --enable-avx2 would use FMA code for scalar and avx
523
    codelets, which is wrong.
524
    
525
    This change improves performance by a few percent on Ryzen (where FMA
526
    doesn't really do anything), and is a wash on Haswell.
527

  
528
commit 0869f4e51b8b0aeb7da1b21b2683c30cd4e10a5e
529
Author: Steven G. Johnson <stevenj@mit.edu>
530
Date:   Tue May 9 09:14:37 2017 -0400
531

  
532
    document that howmany ≥ 0 (closes #95)
533

  
534
commit 665064700b26c01c0836e4c12a5ee0eab3923858
535
Author: Tobias Klauser <tklauser@distanz.ch>
536
Date:   Wed Mar 29 16:15:45 2017 +0200
537

  
538
    Fix ARMV7-A cycle counter detection
539
    
540
    Check for the correct pre-processor define HAVE_ARMV7A_CNTVCT from
541
    config.h (instead of ARMV7A_HAS_CNTVCT) to fix the detection of the
542
    cycle counter for ARMv7-A in the configure script (and actually use it
543
    in the built library).
544
    
545
    Without this fix, even the following ./configure call:
546
    
547
      ./configure --enable-neon --enable-single --enable-armv7a-cntvct \
548
      --host=arm-linux-gnueabihf --disable-fortran \
549
      CC="arm-linux-gnueabihf-gcc -march=armv7-a"
550
    
551
    will emit the warning:
552
    
553
      checking whether a cycle counter is available... no
554
      ***************************************************************
555
      WARNING: No cycle counter found.  FFTW will use ESTIMATE mode
556
               for all plans.  See the manual for more information.
557
      ***************************************************************
558
    
559
    With this fix applied, ./configure will correctly detect the cycle
560
    counter register:
561
    
562
      ...
563
      checking whether a cycle counter is available... yes
564
      ...
565

  
566
commit cc5fc8ce7ffd77f467740554f649aab4d3f71344
567
Merge: 102f2fd0 950b1539
568
Author: Matteo Frigo <athena@fftw.org>
569
Date:   Tue Mar 14 07:21:45 2017 -0400
570

  
571
    Merge pull request #91 from fornwall/android-clock-gettime
572
    
573
    Avoid trying to use CLOCK_SGI_CYCLE on Android
574

  
575
commit 950b153910f7f0dde9cc20cddeee5dc9048d25b7
576
Author: Fredrik Fornwall <fredrik@fornwall.net>
577
Date:   Mon Mar 13 23:41:35 2017 +0100
578

  
579
    Avoid trying to use CLOCK_SGI_CYCLE on Android
580
    
581
    The Android headers defines CLOCK_SGI_CYCLE but the call fails at
582
    runtime as it's not implemented. Combined with getticks() not
583
    checking the return value of clock_gettime() this causes bogus
584
    values to be returned from getticks().
585

  
586
commit 102f2fd0249dca301d195b4df1b94e7b339b8c60
587
Author: Matteo Frigo <athena@fftw.org>
588
Date:   Wed Feb 22 14:59:30 2017 -0500
589

  
590
    Compute mflops() in 64 bit precision
591
    
592
    Old code was overflowing for N>2^32
593

  
594
commit 2b63fc2eaae645a5c2ef4a97c384beb2adefd58d
595
Author: Matteo Frigo <athena@fftw.org>
596
Date:   Fri Jan 27 16:06:27 2017 -0500
597

  
598
    Update NEWS for 3.3.6-pl2
599

  
600
commit d2ca54234956ad8be82ba050305ccf979fd631a7
601
Author: Matteo Frigo <athena@fftw.org>
602
Date:   Fri Jan 27 16:01:42 2017 -0500
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff