comparison src/fftw-3.3.8/genfft/gen_r2cf.ml @ 167:bd3cc4d1df30

Add FFTW 3.3.8 source, and a Linux build
author Chris Cannam <cannam@all-day-breakfast.com>
date Tue, 19 Nov 2019 14:52:55 +0000
parents
children
comparison
equal deleted inserted replaced
166:cbd6d7e562c7 167:bd3cc4d1df30
1 (*
2 * Copyright (c) 1997-1999 Massachusetts Institute of Technology
3 * Copyright (c) 2003, 2007-14 Matteo Frigo
4 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 *)
21
22 open Util
23 open Genutil
24 open C
25
26
27 let usage = "Usage: " ^ Sys.argv.(0) ^ " -n <number>"
28
29 let urs = ref Stride_variable
30 let ucsr = ref Stride_variable
31 let ucsi = ref Stride_variable
32 let uivs = ref Stride_variable
33 let uovs = ref Stride_variable
34 let dftII_flag = ref false
35
36 let speclist = [
37 "-with-rs",
38 Arg.String(fun x -> urs := arg_to_stride x),
39 " specialize for given real-array stride";
40
41 "-with-csr",
42 Arg.String(fun x -> ucsr := arg_to_stride x),
43 " specialize for given complex-array real stride";
44
45 "-with-csi",
46 Arg.String(fun x -> ucsi := arg_to_stride x),
47 " specialize for given complex-array imaginary stride";
48
49 "-with-ivs",
50 Arg.String(fun x -> uivs := arg_to_stride x),
51 " specialize for given input vector stride";
52
53 "-with-ovs",
54 Arg.String(fun x -> uovs := arg_to_stride x),
55 " specialize for given output vector stride";
56
57 "-dft-II",
58 Arg.Unit(fun () -> dftII_flag := true),
59 " produce shifted dftII-style codelets"
60 ]
61
62 let rdftII sign n input =
63 let input' i = if i < n then input i else Complex.zero in
64 let f = Fft.dft sign (2 * n) input' in
65 let g i = f (2 * i + 1)
66 in fun i ->
67 if (i < n - i) then g i
68 else if (2 * i + 1 == n) then Complex.real (g i)
69 else Complex.zero
70
71 let generate n =
72 let ar0 = "R0" and ar1 = "R1" and acr = "Cr" and aci = "Ci"
73 and rs = "rs" and csr = "csr" and csi = "csi"
74 and i = "i" and v = "v"
75 and transform = if !dftII_flag then rdftII else Trig.rdft
76 in
77
78 let sign = !Genutil.sign
79 and name = !Magic.codelet_name in
80
81 let vrs = either_stride (!urs) (C.SVar rs)
82 and vcsr = either_stride (!ucsr) (C.SVar csr)
83 and vcsi = either_stride (!ucsi) (C.SVar csi)
84 in
85
86 let sovs = stride_to_string "ovs" !uovs in
87 let sivs = stride_to_string "ivs" !uivs in
88
89 let locations = unique_array_c n in
90 let inpute =
91 locative_array_c n
92 (C.array_subscript ar0 vrs)
93 (C.array_subscript "BUG" vrs)
94 locations sivs
95 and inputo =
96 locative_array_c n
97 (C.array_subscript ar1 vrs)
98 (C.array_subscript "BUG" vrs)
99 locations sivs
100 in
101 let input i = if i mod 2 == 0 then inpute (i/2) else inputo ((i-1)/2) in
102 let output = transform sign n (load_array_r n input) in
103 let oloc =
104 locative_array_c n
105 (C.array_subscript acr vcsr)
106 (C.array_subscript aci vcsi)
107 locations sovs in
108 let odag = store_array_hc n oloc output in
109 let annot = standard_optimizer odag in
110
111 let body = Block (
112 [Decl ("INT", i)],
113 [For (Expr_assign (CVar i, CVar v),
114 Binop (" > ", CVar i, Integer 0),
115 list_to_comma
116 [Expr_assign (CVar i, CPlus [CVar i; CUminus (Integer 1)]);
117 Expr_assign (CVar ar0, CPlus [CVar ar0; CVar sivs]);
118 Expr_assign (CVar ar1, CPlus [CVar ar1; CVar sivs]);
119 Expr_assign (CVar acr, CPlus [CVar acr; CVar sovs]);
120 Expr_assign (CVar aci, CPlus [CVar aci; CVar sovs]);
121 make_volatile_stride (4*n) (CVar rs);
122 make_volatile_stride (4*n) (CVar csr);
123 make_volatile_stride (4*n) (CVar csi)
124 ],
125 Asch annot)
126 ])
127 in
128
129 let tree =
130 Fcn ((if !Magic.standalone then "void" else "static void"), name,
131 ([Decl (C.realtypep, ar0);
132 Decl (C.realtypep, ar1);
133 Decl (C.realtypep, acr);
134 Decl (C.realtypep, aci);
135 Decl (C.stridetype, rs);
136 Decl (C.stridetype, csr);
137 Decl (C.stridetype, csi);
138 Decl ("INT", v);
139 Decl ("INT", "ivs");
140 Decl ("INT", "ovs")]),
141 finalize_fcn body)
142
143 in let desc =
144 Printf.sprintf
145 "static const kr2c_desc desc = { %d, \"%s\", %s, &GENUS };\n\n"
146 n name (flops_of tree)
147
148 and init =
149 (declare_register_fcn name) ^
150 "{" ^
151 " X(kr2c_register)(p, " ^ name ^ ", &desc);\n" ^
152 "}\n"
153
154 in
155 (unparse tree) ^ "\n" ^ (if !Magic.standalone then "" else desc ^ init)
156
157
158 let main () =
159 begin
160 parse speclist usage;
161 print_string (generate (check_size ()));
162 end
163
164 let _ = main()