Chris@10
|
1 (*
|
Chris@10
|
2 * Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
Chris@10
|
3 * Copyright (c) 2003, 2007-11 Matteo Frigo
|
Chris@10
|
4 * Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology
|
Chris@10
|
5 *
|
Chris@10
|
6 * This program is free software; you can redistribute it and/or modify
|
Chris@10
|
7 * it under the terms of the GNU General Public License as published by
|
Chris@10
|
8 * the Free Software Foundation; either version 2 of the License, or
|
Chris@10
|
9 * (at your option) any later version.
|
Chris@10
|
10 *
|
Chris@10
|
11 * This program is distributed in the hope that it will be useful,
|
Chris@10
|
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
Chris@10
|
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
Chris@10
|
14 * GNU General Public License for more details.
|
Chris@10
|
15 *
|
Chris@10
|
16 * You should have received a copy of the GNU General Public License
|
Chris@10
|
17 * along with this program; if not, write to the Free Software
|
Chris@10
|
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
Chris@10
|
19 *
|
Chris@10
|
20 *)
|
Chris@10
|
21
|
Chris@10
|
22 (* generation of trigonometric transforms *)
|
Chris@10
|
23
|
Chris@10
|
24 open Util
|
Chris@10
|
25 open Genutil
|
Chris@10
|
26 open C
|
Chris@10
|
27
|
Chris@10
|
28
|
Chris@10
|
29 let usage = "Usage: " ^ Sys.argv.(0) ^ " -n <number>"
|
Chris@10
|
30
|
Chris@10
|
31 let uistride = ref Stride_variable
|
Chris@10
|
32 let uostride = ref Stride_variable
|
Chris@10
|
33 let uivstride = ref Stride_variable
|
Chris@10
|
34 let uovstride = ref Stride_variable
|
Chris@10
|
35
|
Chris@10
|
36 type mode =
|
Chris@10
|
37 | RDFT
|
Chris@10
|
38 | HDFT
|
Chris@10
|
39 | DHT
|
Chris@10
|
40 | REDFT00
|
Chris@10
|
41 | REDFT10
|
Chris@10
|
42 | REDFT01
|
Chris@10
|
43 | REDFT11
|
Chris@10
|
44 | RODFT00
|
Chris@10
|
45 | RODFT10
|
Chris@10
|
46 | RODFT01
|
Chris@10
|
47 | RODFT11
|
Chris@10
|
48 | NONE
|
Chris@10
|
49
|
Chris@10
|
50 let mode = ref NONE
|
Chris@10
|
51 let normsqr = ref 1
|
Chris@10
|
52 let unitary = ref false
|
Chris@10
|
53 let noloop = ref false
|
Chris@10
|
54
|
Chris@10
|
55 let speclist = [
|
Chris@10
|
56 "-with-istride",
|
Chris@10
|
57 Arg.String(fun x -> uistride := arg_to_stride x),
|
Chris@10
|
58 " specialize for given input stride";
|
Chris@10
|
59
|
Chris@10
|
60 "-with-ostride",
|
Chris@10
|
61 Arg.String(fun x -> uostride := arg_to_stride x),
|
Chris@10
|
62 " specialize for given output stride";
|
Chris@10
|
63
|
Chris@10
|
64 "-with-ivstride",
|
Chris@10
|
65 Arg.String(fun x -> uivstride := arg_to_stride x),
|
Chris@10
|
66 " specialize for given input vector stride";
|
Chris@10
|
67
|
Chris@10
|
68 "-with-ovstride",
|
Chris@10
|
69 Arg.String(fun x -> uovstride := arg_to_stride x),
|
Chris@10
|
70 " specialize for given output vector stride";
|
Chris@10
|
71
|
Chris@10
|
72 "-rdft",
|
Chris@10
|
73 Arg.Unit(fun () -> mode := RDFT),
|
Chris@10
|
74 " generate a real DFT codelet";
|
Chris@10
|
75
|
Chris@10
|
76 "-hdft",
|
Chris@10
|
77 Arg.Unit(fun () -> mode := HDFT),
|
Chris@10
|
78 " generate a Hermitian DFT codelet";
|
Chris@10
|
79
|
Chris@10
|
80 "-dht",
|
Chris@10
|
81 Arg.Unit(fun () -> mode := DHT),
|
Chris@10
|
82 " generate a DHT codelet";
|
Chris@10
|
83
|
Chris@10
|
84 "-redft00",
|
Chris@10
|
85 Arg.Unit(fun () -> mode := REDFT00),
|
Chris@10
|
86 " generate a DCT-I codelet";
|
Chris@10
|
87
|
Chris@10
|
88 "-redft10",
|
Chris@10
|
89 Arg.Unit(fun () -> mode := REDFT10),
|
Chris@10
|
90 " generate a DCT-II codelet";
|
Chris@10
|
91
|
Chris@10
|
92 "-redft01",
|
Chris@10
|
93 Arg.Unit(fun () -> mode := REDFT01),
|
Chris@10
|
94 " generate a DCT-III codelet";
|
Chris@10
|
95
|
Chris@10
|
96 "-redft11",
|
Chris@10
|
97 Arg.Unit(fun () -> mode := REDFT11),
|
Chris@10
|
98 " generate a DCT-IV codelet";
|
Chris@10
|
99
|
Chris@10
|
100 "-rodft00",
|
Chris@10
|
101 Arg.Unit(fun () -> mode := RODFT00),
|
Chris@10
|
102 " generate a DST-I codelet";
|
Chris@10
|
103
|
Chris@10
|
104 "-rodft10",
|
Chris@10
|
105 Arg.Unit(fun () -> mode := RODFT10),
|
Chris@10
|
106 " generate a DST-II codelet";
|
Chris@10
|
107
|
Chris@10
|
108 "-rodft01",
|
Chris@10
|
109 Arg.Unit(fun () -> mode := RODFT01),
|
Chris@10
|
110 " generate a DST-III codelet";
|
Chris@10
|
111
|
Chris@10
|
112 "-rodft11",
|
Chris@10
|
113 Arg.Unit(fun () -> mode := RODFT11),
|
Chris@10
|
114 " generate a DST-IV codelet";
|
Chris@10
|
115
|
Chris@10
|
116 "-normalization",
|
Chris@10
|
117 Arg.String(fun x -> let ix = int_of_string x in normsqr := ix * ix),
|
Chris@10
|
118 " normalization integer to divide by";
|
Chris@10
|
119
|
Chris@10
|
120 "-normsqr",
|
Chris@10
|
121 Arg.String(fun x -> normsqr := int_of_string x),
|
Chris@10
|
122 " integer square of normalization to divide by";
|
Chris@10
|
123
|
Chris@10
|
124 "-unitary",
|
Chris@10
|
125 Arg.Unit(fun () -> unitary := true),
|
Chris@10
|
126 " unitary normalization (up overall scale factor)";
|
Chris@10
|
127
|
Chris@10
|
128 "-noloop",
|
Chris@10
|
129 Arg.Unit(fun () -> noloop := true),
|
Chris@10
|
130 " no vector loop";
|
Chris@10
|
131 ]
|
Chris@10
|
132
|
Chris@10
|
133 let sqrt_half = Complex.inverse_int_sqrt 2
|
Chris@10
|
134 let sqrt_two = Complex.int_sqrt 2
|
Chris@10
|
135
|
Chris@10
|
136 let rescale sc s1 s2 input i =
|
Chris@10
|
137 if ((i == s1 || i == s2) && !unitary) then
|
Chris@10
|
138 Complex.times (input i) sc
|
Chris@10
|
139 else
|
Chris@10
|
140 input i
|
Chris@10
|
141
|
Chris@10
|
142 let generate n mode =
|
Chris@10
|
143 let iarray = "I"
|
Chris@10
|
144 and oarray = "O"
|
Chris@10
|
145 and istride = "is"
|
Chris@10
|
146 and ostride = "os"
|
Chris@10
|
147 and i = "i"
|
Chris@10
|
148 and v = "v"
|
Chris@10
|
149 in
|
Chris@10
|
150
|
Chris@10
|
151 let sign = !Genutil.sign
|
Chris@10
|
152 and name = !Magic.codelet_name in
|
Chris@10
|
153
|
Chris@10
|
154 let vistride = either_stride (!uistride) (C.SVar istride)
|
Chris@10
|
155 and vostride = either_stride (!uostride) (C.SVar ostride)
|
Chris@10
|
156 in
|
Chris@10
|
157
|
Chris@10
|
158 let sovs = stride_to_string "ovs" !uovstride in
|
Chris@10
|
159 let sivs = stride_to_string "ivs" !uivstride in
|
Chris@10
|
160
|
Chris@10
|
161 let (transform, load_input, store_output, si1,si2,so1,so2) = match mode with
|
Chris@10
|
162 | RDFT -> Trig.rdft sign, load_array_r, store_array_hc, -1,-1,-1,-1
|
Chris@10
|
163 | HDFT -> Trig.hdft sign, load_array_c, store_array_r, -1,-1,-1,-1 (* TODO *)
|
Chris@10
|
164 | DHT -> Trig.dht 1, load_array_r, store_array_r, -1,-1,-1,-1
|
Chris@10
|
165 | REDFT00 -> Trig.dctI, load_array_r, store_array_r, 0,n-1,0,n-1
|
Chris@10
|
166 | REDFT10 -> Trig.dctII, load_array_r, store_array_r, -1,-1,0,-1
|
Chris@10
|
167 | REDFT01 -> Trig.dctIII, load_array_r, store_array_r, 0,-1,-1,-1
|
Chris@10
|
168 | REDFT11 -> Trig.dctIV, load_array_r, store_array_r, -1,-1,-1,-1
|
Chris@10
|
169 | RODFT00 -> Trig.dstI, load_array_r, store_array_r, -1,-1,-1,-1
|
Chris@10
|
170 | RODFT10 -> Trig.dstII, load_array_r, store_array_r, -1,-1,n-1,-1
|
Chris@10
|
171 | RODFT01 -> Trig.dstIII, load_array_r, store_array_r, n-1,-1,-1,-1
|
Chris@10
|
172 | RODFT11 -> Trig.dstIV, load_array_r, store_array_r, -1,-1,-1,-1
|
Chris@10
|
173 | _ -> failwith "must specify transform kind"
|
Chris@10
|
174 in
|
Chris@10
|
175
|
Chris@10
|
176 let locations = unique_array_c n in
|
Chris@10
|
177 let input = locative_array_c n
|
Chris@10
|
178 (C.array_subscript iarray vistride)
|
Chris@10
|
179 (C.array_subscript "BUG" vistride)
|
Chris@10
|
180 locations sivs in
|
Chris@10
|
181 let output = rescale sqrt_half so1 so2
|
Chris@10
|
182 ((Complex.times (Complex.inverse_int_sqrt !normsqr))
|
Chris@10
|
183 @@ (transform n (rescale sqrt_two si1 si2 (load_array_c n input)))) in
|
Chris@10
|
184 let oloc =
|
Chris@10
|
185 locative_array_c n
|
Chris@10
|
186 (C.array_subscript oarray vostride)
|
Chris@10
|
187 (C.array_subscript "BUG" vostride)
|
Chris@10
|
188 locations sovs in
|
Chris@10
|
189 let odag = store_output n oloc output in
|
Chris@10
|
190 let annot = standard_optimizer odag in
|
Chris@10
|
191
|
Chris@10
|
192 let body = if !noloop then Block([], [Asch annot]) else Block (
|
Chris@10
|
193 [Decl ("INT", i)],
|
Chris@10
|
194 [For (Expr_assign (CVar i, CVar v),
|
Chris@10
|
195 Binop (" > ", CVar i, Integer 0),
|
Chris@10
|
196 list_to_comma
|
Chris@10
|
197 [Expr_assign (CVar i, CPlus [CVar i; CUminus (Integer 1)]);
|
Chris@10
|
198 Expr_assign (CVar iarray, CPlus [CVar iarray; CVar sivs]);
|
Chris@10
|
199 Expr_assign (CVar oarray, CPlus [CVar oarray; CVar sovs]);
|
Chris@10
|
200 make_volatile_stride (2*n) (CVar istride);
|
Chris@10
|
201 make_volatile_stride (2*n) (CVar ostride)
|
Chris@10
|
202 ],
|
Chris@10
|
203 Asch annot)
|
Chris@10
|
204 ])
|
Chris@10
|
205 in
|
Chris@10
|
206
|
Chris@10
|
207 let tree =
|
Chris@10
|
208 Fcn ((if !Magic.standalone then "void" else "static void"), name,
|
Chris@10
|
209 ([Decl (C.constrealtypep, iarray);
|
Chris@10
|
210 Decl (C.realtypep, oarray)]
|
Chris@10
|
211 @ (if stride_fixed !uistride then []
|
Chris@10
|
212 else [Decl (C.stridetype, istride)])
|
Chris@10
|
213 @ (if stride_fixed !uostride then []
|
Chris@10
|
214 else [Decl (C.stridetype, ostride)])
|
Chris@10
|
215 @ (if !noloop then [] else
|
Chris@10
|
216 [Decl ("INT", v)]
|
Chris@10
|
217 @ (if stride_fixed !uivstride then []
|
Chris@10
|
218 else [Decl ("INT", "ivs")])
|
Chris@10
|
219 @ (if stride_fixed !uovstride then []
|
Chris@10
|
220 else [Decl ("INT", "ovs")]))),
|
Chris@10
|
221 finalize_fcn body)
|
Chris@10
|
222
|
Chris@10
|
223 in let desc =
|
Chris@10
|
224 Printf.sprintf
|
Chris@10
|
225 "static const kr2r_desc desc = { %d, \"%s\", %s, &GENUS, %s };\n\n"
|
Chris@10
|
226 n name (flops_of tree)
|
Chris@10
|
227 (match mode with
|
Chris@10
|
228 | RDFT -> "RDFT00"
|
Chris@10
|
229 | HDFT -> "HDFT00"
|
Chris@10
|
230 | DHT -> "DHT"
|
Chris@10
|
231 | REDFT00 -> "REDFT00"
|
Chris@10
|
232 | REDFT10 -> "REDFT10"
|
Chris@10
|
233 | REDFT01 -> "REDFT01"
|
Chris@10
|
234 | REDFT11 -> "REDFT11"
|
Chris@10
|
235 | RODFT00 -> "RODFT00"
|
Chris@10
|
236 | RODFT10 -> "RODFT10"
|
Chris@10
|
237 | RODFT01 -> "RODFT01"
|
Chris@10
|
238 | RODFT11 -> "RODFT11"
|
Chris@10
|
239 | _ -> failwith "must specify a transform kind")
|
Chris@10
|
240
|
Chris@10
|
241 and init =
|
Chris@10
|
242 (declare_register_fcn name) ^
|
Chris@10
|
243 "{" ^
|
Chris@10
|
244 " X(kr2r_register)(p, " ^ name ^ ", &desc);\n" ^
|
Chris@10
|
245 "}\n"
|
Chris@10
|
246
|
Chris@10
|
247 in
|
Chris@10
|
248 (unparse tree) ^ "\n" ^ (if !Magic.standalone then "" else desc ^ init)
|
Chris@10
|
249
|
Chris@10
|
250
|
Chris@10
|
251 let main () =
|
Chris@10
|
252 begin
|
Chris@10
|
253 parse speclist usage;
|
Chris@10
|
254 print_string (generate (check_size ()) !mode);
|
Chris@10
|
255 end
|
Chris@10
|
256
|
Chris@10
|
257 let _ = main()
|