annotate src/fftw-3.3.3/dft/simd/codlist.mk @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 37bf6b4a2645
children
rev   line source
Chris@10 1 # This file contains a standard list of DFT SIMD codelets. It is
Chris@10 2 # included by common/Makefile to generate the C files with the actual
Chris@10 3 # codelets in them. It is included by {sse,sse2,...}/Makefile to
Chris@10 4 # generate and compile stub files that include common/*.c
Chris@10 5
Chris@10 6 # You can customize FFTW for special needs, e.g. to handle certain
Chris@10 7 # sizes more efficiently, by adding new codelets to the lists of those
Chris@10 8 # included by default. If you change the list of codelets, any new
Chris@10 9 # ones you added will be automatically generated when you run the
Chris@10 10 # bootstrap script (see "Generating your own code" in the FFTW
Chris@10 11 # manual).
Chris@10 12
Chris@10 13 ###########################################################################
Chris@10 14 # n1fv_<n> is a hard-coded FFTW_FORWARD FFT of size <n>, using SIMD
Chris@10 15 N1F = n1fv_2.c n1fv_3.c n1fv_4.c n1fv_5.c n1fv_6.c n1fv_7.c n1fv_8.c \
Chris@10 16 n1fv_9.c n1fv_10.c n1fv_11.c n1fv_12.c n1fv_13.c n1fv_14.c n1fv_15.c \
Chris@10 17 n1fv_16.c n1fv_32.c n1fv_64.c n1fv_128.c n1fv_20.c n1fv_25.c
Chris@10 18
Chris@10 19 # as above, with restricted input vector stride
Chris@10 20 N2F = n2fv_2.c n2fv_4.c n2fv_6.c n2fv_8.c n2fv_10.c n2fv_12.c \
Chris@10 21 n2fv_14.c n2fv_16.c n2fv_32.c n2fv_64.c n2fv_20.c
Chris@10 22
Chris@10 23 # as above, but FFTW_BACKWARD
Chris@10 24 N1B = n1bv_2.c n1bv_3.c n1bv_4.c n1bv_5.c n1bv_6.c n1bv_7.c n1bv_8.c \
Chris@10 25 n1bv_9.c n1bv_10.c n1bv_11.c n1bv_12.c n1bv_13.c n1bv_14.c n1bv_15.c \
Chris@10 26 n1bv_16.c n1bv_32.c n1bv_64.c n1bv_128.c n1bv_20.c n1bv_25.c
Chris@10 27
Chris@10 28 N2B = n2bv_2.c n2bv_4.c n2bv_6.c n2bv_8.c n2bv_10.c n2bv_12.c \
Chris@10 29 n2bv_14.c n2bv_16.c n2bv_32.c n2bv_64.c n2bv_20.c
Chris@10 30
Chris@10 31 # split-complex codelets
Chris@10 32 N2S = n2sv_4.c n2sv_8.c n2sv_16.c n2sv_32.c n2sv_64.c
Chris@10 33
Chris@10 34 ###########################################################################
Chris@10 35 # t1fv_<r> is a "twiddle" FFT of size <r>, implementing a radix-r DIT step
Chris@10 36 # for an FFTW_FORWARD transform, using SIMD
Chris@10 37 T1F = t1fv_2.c t1fv_3.c t1fv_4.c t1fv_5.c t1fv_6.c t1fv_7.c t1fv_8.c \
Chris@10 38 t1fv_9.c t1fv_10.c t1fv_12.c t1fv_15.c t1fv_16.c t1fv_32.c t1fv_64.c \
Chris@10 39 t1fv_20.c t1fv_25.c
Chris@10 40
Chris@10 41 # same as t1fv_*, but with different twiddle storage scheme
Chris@10 42 T2F = t2fv_2.c t2fv_4.c t2fv_8.c t2fv_16.c t2fv_32.c t2fv_64.c \
Chris@10 43 t2fv_5.c t2fv_10.c t2fv_20.c t2fv_25.c
Chris@10 44 T3F = t3fv_4.c t3fv_8.c t3fv_16.c t3fv_32.c t3fv_5.c t3fv_10.c \
Chris@10 45 t3fv_20.c t3fv_25.c
Chris@10 46 T1FU = t1fuv_2.c t1fuv_3.c t1fuv_4.c t1fuv_5.c t1fuv_6.c t1fuv_7.c \
Chris@10 47 t1fuv_8.c t1fuv_9.c t1fuv_10.c
Chris@10 48
Chris@10 49 # as above, but FFTW_BACKWARD
Chris@10 50 T1B = t1bv_2.c t1bv_3.c t1bv_4.c t1bv_5.c t1bv_6.c t1bv_7.c t1bv_8.c \
Chris@10 51 t1bv_9.c t1bv_10.c t1bv_12.c t1bv_15.c t1bv_16.c t1bv_32.c t1bv_64.c \
Chris@10 52 t1bv_20.c t1bv_25.c
Chris@10 53
Chris@10 54 # same as t1bv_*, but with different twiddle storage scheme
Chris@10 55 T2B = t2bv_2.c t2bv_4.c t2bv_8.c t2bv_16.c t2bv_32.c t2bv_64.c \
Chris@10 56 t2bv_5.c t2bv_10.c t2bv_20.c t2bv_25.c
Chris@10 57 T3B = t3bv_4.c t3bv_8.c t3bv_16.c t3bv_32.c t3bv_5.c t3bv_10.c \
Chris@10 58 t3bv_20.c t3bv_25.c
Chris@10 59 T1BU = t1buv_2.c t1buv_3.c t1buv_4.c t1buv_5.c t1buv_6.c t1buv_7.c \
Chris@10 60 t1buv_8.c t1buv_9.c t1buv_10.c
Chris@10 61
Chris@10 62 # split-complex codelets
Chris@10 63 T1S = t1sv_2.c t1sv_4.c t1sv_8.c t1sv_16.c t1sv_32.c
Chris@10 64 T2S = t2sv_4.c t2sv_8.c t2sv_16.c t2sv_32.c
Chris@10 65
Chris@10 66 ###########################################################################
Chris@10 67 # q1fv_<r> is <r> twiddle FFTW_FORWARD FFTs of size <r> (DIF step),
Chris@10 68 # where the output is transposed, using SIMD. This is used for
Chris@10 69 # in-place transposes in sizes that are divisible by <r>^2. These
Chris@10 70 # codelets have size ~ <r>^2, so you should probably not use <r>
Chris@10 71 # bigger than 8 or so.
Chris@10 72 Q1F = q1fv_2.c q1fv_4.c q1fv_5.c q1fv_8.c
Chris@10 73
Chris@10 74 # as above, but FFTW_BACKWARD
Chris@10 75 Q1B = q1bv_2.c q1bv_4.c q1bv_5.c q1bv_8.c
Chris@10 76
Chris@10 77 ###########################################################################
Chris@10 78 SIMD_CODELETS = $(N1F) $(N1B) $(N2F) $(N2B) $(N2S) $(T1FU) $(T1F) \
Chris@10 79 $(T2F) $(T3F) $(T1BU) $(T1B) $(T2B) $(T3B) $(T1S) $(T2S) $(Q1F) $(Q1B)