Chris@10
|
1 (*
|
Chris@10
|
2 * Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
Chris@10
|
3 * Copyright (c) 2003, 2007-11 Matteo Frigo
|
Chris@10
|
4 * Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology
|
Chris@10
|
5 *
|
Chris@10
|
6 * This program is free software; you can redistribute it and/or modify
|
Chris@10
|
7 * it under the terms of the GNU General Public License as published by
|
Chris@10
|
8 * the Free Software Foundation; either version 2 of the License, or
|
Chris@10
|
9 * (at your option) any later version.
|
Chris@10
|
10 *
|
Chris@10
|
11 * This program is distributed in the hope that it will be useful,
|
Chris@10
|
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
Chris@10
|
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
Chris@10
|
14 * GNU General Public License for more details.
|
Chris@10
|
15 *
|
Chris@10
|
16 * You should have received a copy of the GNU General Public License
|
Chris@10
|
17 * along with this program; if not, write to the Free Software
|
Chris@10
|
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
Chris@10
|
19 *
|
Chris@10
|
20 *)
|
Chris@10
|
21
|
Chris@10
|
22 (* utilities common to all generators *)
|
Chris@10
|
23 open Util
|
Chris@10
|
24
|
Chris@10
|
25 let choose_simd a b = if !Simdmagic.simd_mode then b else a
|
Chris@10
|
26
|
Chris@10
|
27 let unique_array n = array n (fun _ -> Unique.make ())
|
Chris@10
|
28 let unique_array_c n =
|
Chris@10
|
29 array n (fun _ ->
|
Chris@10
|
30 (Unique.make (), Unique.make ()))
|
Chris@10
|
31
|
Chris@10
|
32 let unique_v_array_c veclen n =
|
Chris@10
|
33 array veclen (fun _ ->
|
Chris@10
|
34 unique_array_c n)
|
Chris@10
|
35
|
Chris@10
|
36 let locative_array_c n rarr iarr loc vs =
|
Chris@10
|
37 array n (fun i ->
|
Chris@10
|
38 let klass = Unique.make () in
|
Chris@10
|
39 let (rloc, iloc) = loc i in
|
Chris@10
|
40 (Variable.make_locative rloc klass rarr i vs,
|
Chris@10
|
41 Variable.make_locative iloc klass iarr i vs))
|
Chris@10
|
42
|
Chris@10
|
43 let locative_v_array_c veclen n rarr iarr loc vs =
|
Chris@10
|
44 array veclen (fun v ->
|
Chris@10
|
45 array n (fun i ->
|
Chris@10
|
46 let klass = Unique.make () in
|
Chris@10
|
47 let (rloc, iloc) = loc v i in
|
Chris@10
|
48 (Variable.make_locative rloc klass (rarr v) i vs,
|
Chris@10
|
49 Variable.make_locative iloc klass (iarr v) i vs)))
|
Chris@10
|
50
|
Chris@10
|
51 let temporary_array n =
|
Chris@10
|
52 array n (fun i -> Variable.make_temporary ())
|
Chris@10
|
53
|
Chris@10
|
54 let temporary_array_c n =
|
Chris@10
|
55 let tmpr = temporary_array n
|
Chris@10
|
56 and tmpi = temporary_array n
|
Chris@10
|
57 in
|
Chris@10
|
58 array n (fun i -> (tmpr i, tmpi i))
|
Chris@10
|
59
|
Chris@10
|
60 let temporary_v_array_c veclen n =
|
Chris@10
|
61 array veclen (fun v -> temporary_array_c n)
|
Chris@10
|
62
|
Chris@10
|
63 let temporary_array_c n =
|
Chris@10
|
64 let tmpr = temporary_array n
|
Chris@10
|
65 and tmpi = temporary_array n
|
Chris@10
|
66 in
|
Chris@10
|
67 array n (fun i -> (tmpr i, tmpi i))
|
Chris@10
|
68
|
Chris@10
|
69 let load_c (vr, vi) = Complex.make (Expr.Load vr, Expr.Load vi)
|
Chris@10
|
70 let load_r (vr, vi) = Complex.make (Expr.Load vr, Expr.Num (Number.zero))
|
Chris@10
|
71
|
Chris@10
|
72 let twiddle_array nt w =
|
Chris@10
|
73 array (nt/2) (fun i ->
|
Chris@10
|
74 let stride = choose_simd (C.SInteger 1) (C.SConst "TWVL")
|
Chris@10
|
75 and klass = Unique.make () in
|
Chris@10
|
76 let (refr, refi) = (C.array_subscript w stride (2 * i),
|
Chris@10
|
77 C.array_subscript w stride (2 * i + 1))
|
Chris@10
|
78 in
|
Chris@10
|
79 let (kr, ki) = (Variable.make_constant klass refr,
|
Chris@10
|
80 Variable.make_constant klass refi)
|
Chris@10
|
81 in
|
Chris@10
|
82 load_c (kr, ki))
|
Chris@10
|
83
|
Chris@10
|
84
|
Chris@10
|
85 let load_array_c n var = array n (fun i -> load_c (var i))
|
Chris@10
|
86 let load_array_r n var = array n (fun i -> load_r (var i))
|
Chris@10
|
87 let load_array_hc n var =
|
Chris@10
|
88 array n (fun i ->
|
Chris@10
|
89 if (i < n - i) then
|
Chris@10
|
90 load_c (var i)
|
Chris@10
|
91 else if (i > n - i) then
|
Chris@10
|
92 Complex.times Complex.i (load_c (var (n - i)))
|
Chris@10
|
93 else
|
Chris@10
|
94 load_r (var i))
|
Chris@10
|
95
|
Chris@10
|
96 let load_v_array_c veclen n var =
|
Chris@10
|
97 array veclen (fun v -> load_array_c n (var v))
|
Chris@10
|
98
|
Chris@10
|
99 let store_c (vr, vi) x = [Complex.store_real vr x; Complex.store_imag vi x]
|
Chris@10
|
100 let store_r (vr, vi) x = Complex.store_real vr x
|
Chris@10
|
101 let store_i (vr, vi) x = Complex.store_imag vi x
|
Chris@10
|
102
|
Chris@10
|
103 let assign_array_c n dst src =
|
Chris@10
|
104 List.flatten
|
Chris@10
|
105 (rmap (iota n)
|
Chris@10
|
106 (fun i ->
|
Chris@10
|
107 let (ar, ai) = Complex.assign (dst i) (src i)
|
Chris@10
|
108 in [ar; ai]))
|
Chris@10
|
109 let assign_v_array_c veclen n dst src =
|
Chris@10
|
110 List.flatten
|
Chris@10
|
111 (rmap (iota veclen)
|
Chris@10
|
112 (fun v ->
|
Chris@10
|
113 assign_array_c n (dst v) (src v)))
|
Chris@10
|
114
|
Chris@10
|
115 let vassign_v_array_c veclen n dst src =
|
Chris@10
|
116 List.flatten
|
Chris@10
|
117 (rmap (iota n) (fun i ->
|
Chris@10
|
118 List.flatten
|
Chris@10
|
119 (rmap (iota veclen)
|
Chris@10
|
120 (fun v ->
|
Chris@10
|
121 let (ar, ai) = Complex.assign (dst v i) (src v i)
|
Chris@10
|
122 in [ar; ai]))))
|
Chris@10
|
123
|
Chris@10
|
124 let store_array_r n dst src =
|
Chris@10
|
125 rmap (iota n)
|
Chris@10
|
126 (fun i -> store_r (dst i) (src i))
|
Chris@10
|
127
|
Chris@10
|
128 let store_array_c n dst src =
|
Chris@10
|
129 List.flatten
|
Chris@10
|
130 (rmap (iota n)
|
Chris@10
|
131 (fun i -> store_c (dst i) (src i)))
|
Chris@10
|
132
|
Chris@10
|
133 let store_array_hc n dst src =
|
Chris@10
|
134 List.flatten
|
Chris@10
|
135 (rmap (iota n)
|
Chris@10
|
136 (fun i ->
|
Chris@10
|
137 if (i < n - i) then
|
Chris@10
|
138 store_c (dst i) (src i)
|
Chris@10
|
139 else if (i > n - i) then
|
Chris@10
|
140 []
|
Chris@10
|
141 else
|
Chris@10
|
142 [store_r (dst i) (Complex.real (src i))]))
|
Chris@10
|
143
|
Chris@10
|
144
|
Chris@10
|
145 let store_v_array_c veclen n dst src =
|
Chris@10
|
146 List.flatten
|
Chris@10
|
147 (rmap (iota veclen)
|
Chris@10
|
148 (fun v ->
|
Chris@10
|
149 store_array_c n (dst v) (src v)))
|
Chris@10
|
150
|
Chris@10
|
151
|
Chris@10
|
152 let elementwise f n a = array n (fun i -> f (a i))
|
Chris@10
|
153 let conj_array_c = elementwise Complex.conj
|
Chris@10
|
154 let real_array_c = elementwise Complex.real
|
Chris@10
|
155 let imag_array_c = elementwise Complex.imag
|
Chris@10
|
156
|
Chris@10
|
157 let elementwise_v f veclen n a =
|
Chris@10
|
158 array veclen (fun v ->
|
Chris@10
|
159 array n (fun i -> f (a v i)))
|
Chris@10
|
160 let conj_v_array_c = elementwise_v Complex.conj
|
Chris@10
|
161 let real_v_array_c = elementwise_v Complex.real
|
Chris@10
|
162 let imag_v_array_c = elementwise_v Complex.imag
|
Chris@10
|
163
|
Chris@10
|
164
|
Chris@10
|
165 let transpose f i j = f j i
|
Chris@10
|
166 let symmetrize f i j = if i <= j then f i j else f j i
|
Chris@10
|
167
|
Chris@10
|
168 (* utilities for command-line parsing *)
|
Chris@10
|
169 let standard_arg_parse_fail _ = failwith "too many arguments"
|
Chris@10
|
170
|
Chris@10
|
171 let dump_dag alist =
|
Chris@10
|
172 let fnam = !Magic.dag_dump_file in
|
Chris@10
|
173 if (String.length fnam > 0) then
|
Chris@10
|
174 let ochan = open_out fnam in
|
Chris@10
|
175 begin
|
Chris@10
|
176 To_alist.dump (output_string ochan) alist;
|
Chris@10
|
177 close_out ochan;
|
Chris@10
|
178 end
|
Chris@10
|
179
|
Chris@10
|
180 let dump_alist alist =
|
Chris@10
|
181 let fnam = !Magic.alist_dump_file in
|
Chris@10
|
182 if (String.length fnam > 0) then
|
Chris@10
|
183 let ochan = open_out fnam in
|
Chris@10
|
184 begin
|
Chris@10
|
185 Expr.dump (output_string ochan) alist;
|
Chris@10
|
186 close_out ochan;
|
Chris@10
|
187 end
|
Chris@10
|
188
|
Chris@10
|
189 let dump_asched asched =
|
Chris@10
|
190 let fnam = !Magic.asched_dump_file in
|
Chris@10
|
191 if (String.length fnam > 0) then
|
Chris@10
|
192 let ochan = open_out fnam in
|
Chris@10
|
193 begin
|
Chris@10
|
194 Annotate.dump (output_string ochan) asched;
|
Chris@10
|
195 close_out ochan;
|
Chris@10
|
196 end
|
Chris@10
|
197
|
Chris@10
|
198 (* utilities for optimization *)
|
Chris@10
|
199 let standard_scheduler dag =
|
Chris@10
|
200 let optim = Algsimp.algsimp dag in
|
Chris@10
|
201 let alist = To_alist.to_assignments optim in
|
Chris@10
|
202 let _ = dump_alist alist in
|
Chris@10
|
203 let _ = dump_dag alist in
|
Chris@10
|
204 if !Magic.precompute_twiddles then
|
Chris@10
|
205 Schedule.isolate_precomputations_and_schedule alist
|
Chris@10
|
206 else
|
Chris@10
|
207 Schedule.schedule alist
|
Chris@10
|
208
|
Chris@10
|
209 let standard_optimizer dag =
|
Chris@10
|
210 let sched = standard_scheduler dag in
|
Chris@10
|
211 let annot = Annotate.annotate [] sched in
|
Chris@10
|
212 let _ = dump_asched annot in
|
Chris@10
|
213 annot
|
Chris@10
|
214
|
Chris@10
|
215 let size = ref None
|
Chris@10
|
216 let sign = ref (-1)
|
Chris@10
|
217
|
Chris@10
|
218 let speclist = [
|
Chris@10
|
219 "-n", Arg.Int(fun i -> size := Some i), " generate a codelet of size <n>";
|
Chris@10
|
220 "-sign",
|
Chris@10
|
221 Arg.Int(fun i ->
|
Chris@10
|
222 if (i > 0) then
|
Chris@10
|
223 sign := 1
|
Chris@10
|
224 else
|
Chris@10
|
225 sign := (-1)),
|
Chris@10
|
226 " sign of transform";
|
Chris@10
|
227 ]
|
Chris@10
|
228
|
Chris@10
|
229 let check_size () =
|
Chris@10
|
230 match !size with
|
Chris@10
|
231 | Some i -> i
|
Chris@10
|
232 | None -> failwith "must specify -n"
|
Chris@10
|
233
|
Chris@10
|
234 let expand_name name = if name = "" then "noname" else name
|
Chris@10
|
235
|
Chris@10
|
236 let declare_register_fcn name =
|
Chris@10
|
237 if name = "" then
|
Chris@10
|
238 "void NAME(planner *p)\n"
|
Chris@10
|
239 else
|
Chris@10
|
240 "void " ^ (choose_simd "X" "XSIMD") ^
|
Chris@10
|
241 "(codelet_" ^ name ^ ")(planner *p)\n"
|
Chris@10
|
242
|
Chris@10
|
243 let stringify name =
|
Chris@10
|
244 if name = "" then "STRINGIZE(NAME)" else
|
Chris@10
|
245 choose_simd ("\"" ^ name ^ "\"")
|
Chris@10
|
246 ("XSIMD_STRING(\"" ^ name ^ "\")")
|
Chris@10
|
247
|
Chris@10
|
248 let parse user_speclist usage =
|
Chris@10
|
249 Arg.parse
|
Chris@10
|
250 (user_speclist @ speclist @ Magic.speclist @ Simdmagic.speclist)
|
Chris@10
|
251 standard_arg_parse_fail
|
Chris@10
|
252 usage
|
Chris@10
|
253
|
Chris@10
|
254 let rec list_to_c = function
|
Chris@10
|
255 [] -> ""
|
Chris@10
|
256 | [a] -> (string_of_int a)
|
Chris@10
|
257 | a :: b -> (string_of_int a) ^ ", " ^ (list_to_c b)
|
Chris@10
|
258
|
Chris@10
|
259 let rec list_to_comma = function
|
Chris@10
|
260 | [a; b] -> C.Comma (a, b)
|
Chris@10
|
261 | a :: b -> C.Comma (a, list_to_comma b)
|
Chris@10
|
262 | _ -> failwith "list_to_comma"
|
Chris@10
|
263
|
Chris@10
|
264
|
Chris@10
|
265 type stride = Stride_variable | Fixed_int of int | Fixed_string of string
|
Chris@10
|
266
|
Chris@10
|
267 let either_stride a b =
|
Chris@10
|
268 match a with
|
Chris@10
|
269 Fixed_int x -> C.SInteger x
|
Chris@10
|
270 | Fixed_string x -> C.SConst x
|
Chris@10
|
271 | _ -> b
|
Chris@10
|
272
|
Chris@10
|
273 let stride_fixed = function
|
Chris@10
|
274 Stride_variable -> false
|
Chris@10
|
275 | _ -> true
|
Chris@10
|
276
|
Chris@10
|
277 let arg_to_stride s =
|
Chris@10
|
278 try
|
Chris@10
|
279 Fixed_int (int_of_string s)
|
Chris@10
|
280 with Failure "int_of_string" ->
|
Chris@10
|
281 Fixed_string s
|
Chris@10
|
282
|
Chris@10
|
283 let stride_to_solverparm = function
|
Chris@10
|
284 Stride_variable -> "0"
|
Chris@10
|
285 | Fixed_int x -> string_of_int x
|
Chris@10
|
286 | Fixed_string x -> x
|
Chris@10
|
287
|
Chris@10
|
288 let stride_to_string s = function
|
Chris@10
|
289 Stride_variable -> s
|
Chris@10
|
290 | Fixed_int x -> string_of_int x
|
Chris@10
|
291 | Fixed_string x -> x
|
Chris@10
|
292
|
Chris@10
|
293 (* output the command line *)
|
Chris@10
|
294 let cmdline () =
|
Chris@10
|
295 List.fold_right (fun a b -> a ^ " " ^ b) (Array.to_list Sys.argv) ""
|
Chris@10
|
296
|
Chris@10
|
297 let unparse tree =
|
Chris@10
|
298 "/* Generated by: " ^ (cmdline ()) ^ "*/\n\n" ^
|
Chris@10
|
299 (C.print_cost tree) ^
|
Chris@10
|
300 (if String.length !Magic.inklude > 0
|
Chris@10
|
301 then
|
Chris@10
|
302 (Printf.sprintf "#include \"%s\"\n\n" !Magic.inklude)
|
Chris@10
|
303 else "") ^
|
Chris@10
|
304 (if !Simdmagic.simd_mode then
|
Chris@10
|
305 Simd.unparse_function tree
|
Chris@10
|
306 else
|
Chris@10
|
307 C.unparse_function tree)
|
Chris@10
|
308
|
Chris@10
|
309 let finalize_fcn ast =
|
Chris@10
|
310 let mergedecls = function
|
Chris@10
|
311 C.Block (d1, [C.Block (d2, s)]) -> C.Block (d1 @ d2, s)
|
Chris@10
|
312 | x -> x
|
Chris@10
|
313 and extract_constants =
|
Chris@10
|
314 if !Simdmagic.simd_mode then
|
Chris@10
|
315 Simd.extract_constants
|
Chris@10
|
316 else
|
Chris@10
|
317 C.extract_constants
|
Chris@10
|
318
|
Chris@10
|
319 in mergedecls (C.Block (extract_constants ast, [ast; C.Simd_leavefun]))
|
Chris@10
|
320
|
Chris@10
|
321 let twinstr_to_string vl x =
|
Chris@10
|
322 if !Simdmagic.simd_mode then
|
Chris@10
|
323 Twiddle.twinstr_to_simd_string vl x
|
Chris@10
|
324 else
|
Chris@10
|
325 Twiddle.twinstr_to_c_string x
|
Chris@10
|
326
|
Chris@10
|
327 let make_volatile_stride n x =
|
Chris@10
|
328 C.CCall ("MAKE_VOLATILE_STRIDE", C.Comma((C.Integer n), x))
|