Chris@10
|
1 (*
|
Chris@10
|
2 * Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
Chris@10
|
3 * Copyright (c) 2003, 2007-11 Matteo Frigo
|
Chris@10
|
4 * Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology
|
Chris@10
|
5 *
|
Chris@10
|
6 * This program is free software; you can redistribute it and/or modify
|
Chris@10
|
7 * it under the terms of the GNU General Public License as published by
|
Chris@10
|
8 * the Free Software Foundation; either version 2 of the License, or
|
Chris@10
|
9 * (at your option) any later version.
|
Chris@10
|
10 *
|
Chris@10
|
11 * This program is distributed in the hope that it will be useful,
|
Chris@10
|
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
Chris@10
|
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
Chris@10
|
14 * GNU General Public License for more details.
|
Chris@10
|
15 *
|
Chris@10
|
16 * You should have received a copy of the GNU General Public License
|
Chris@10
|
17 * along with this program; if not, write to the Free Software
|
Chris@10
|
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
Chris@10
|
19 *
|
Chris@10
|
20 *)
|
Chris@10
|
21
|
Chris@10
|
22 (*************************************************************
|
Chris@10
|
23 * Conversion of the dag to an assignment list
|
Chris@10
|
24 *************************************************************)
|
Chris@10
|
25 (*
|
Chris@10
|
26 * This function is messy. The main problem is that we want to
|
Chris@10
|
27 * inline dag nodes conditionally, depending on how many times they
|
Chris@10
|
28 * are used. The Right Thing to do would be to modify the
|
Chris@10
|
29 * state monad to propagate some of the state backwards, so that
|
Chris@10
|
30 * we know whether a given node will be used again in the future.
|
Chris@10
|
31 * This modification is trivial in a lazy language, but it is
|
Chris@10
|
32 * messy in a strict language like ML.
|
Chris@10
|
33 *
|
Chris@10
|
34 * In this implementation, we just do the obvious thing, i.e., visit
|
Chris@10
|
35 * the dag twice, the first to count the node usages, and the second to
|
Chris@10
|
36 * produce the output.
|
Chris@10
|
37 *)
|
Chris@10
|
38
|
Chris@10
|
39 open Monads.StateMonad
|
Chris@10
|
40 open Monads.MemoMonad
|
Chris@10
|
41 open Expr
|
Chris@10
|
42
|
Chris@10
|
43 let fresh = Variable.make_temporary
|
Chris@10
|
44 let node_insert x = Assoctable.insert Expr.hash x
|
Chris@10
|
45 let node_lookup x = Assoctable.lookup Expr.hash (==) x
|
Chris@10
|
46 let empty = Assoctable.empty
|
Chris@10
|
47
|
Chris@10
|
48 let fetchAl =
|
Chris@10
|
49 fetchState >>= (fun (al, _, _) -> returnM al)
|
Chris@10
|
50
|
Chris@10
|
51 let storeAl al =
|
Chris@10
|
52 fetchState >>= (fun (_, visited, visited') ->
|
Chris@10
|
53 storeState (al, visited, visited'))
|
Chris@10
|
54
|
Chris@10
|
55 let fetchVisited = fetchState >>= (fun (_, v, _) -> returnM v)
|
Chris@10
|
56
|
Chris@10
|
57 let storeVisited visited =
|
Chris@10
|
58 fetchState >>= (fun (al, _, visited') ->
|
Chris@10
|
59 storeState (al, visited, visited'))
|
Chris@10
|
60
|
Chris@10
|
61 let fetchVisited' = fetchState >>= (fun (_, _, v') -> returnM v')
|
Chris@10
|
62 let storeVisited' visited' =
|
Chris@10
|
63 fetchState >>= (fun (al, visited, _) ->
|
Chris@10
|
64 storeState (al, visited, visited'))
|
Chris@10
|
65 let lookupVisitedM' key =
|
Chris@10
|
66 fetchVisited' >>= fun table ->
|
Chris@10
|
67 returnM (node_lookup key table)
|
Chris@10
|
68 let insertVisitedM' key value =
|
Chris@10
|
69 fetchVisited' >>= fun table ->
|
Chris@10
|
70 storeVisited' (node_insert key value table)
|
Chris@10
|
71
|
Chris@10
|
72 let counting f x =
|
Chris@10
|
73 fetchVisited >>= (fun v ->
|
Chris@10
|
74 match node_lookup x v with
|
Chris@10
|
75 Some count ->
|
Chris@10
|
76 let incr_cnt =
|
Chris@10
|
77 fetchVisited >>= (fun v' ->
|
Chris@10
|
78 storeVisited (node_insert x (count + 1) v'))
|
Chris@10
|
79 in
|
Chris@10
|
80 begin
|
Chris@10
|
81 match x with
|
Chris@10
|
82 (* Uminus is always inlined. Visit child *)
|
Chris@10
|
83 Uminus y -> f y >> incr_cnt
|
Chris@10
|
84 | _ -> incr_cnt
|
Chris@10
|
85 end
|
Chris@10
|
86 | None ->
|
Chris@10
|
87 f x >> fetchVisited >>= (fun v' ->
|
Chris@10
|
88 storeVisited (node_insert x 1 v')))
|
Chris@10
|
89
|
Chris@10
|
90 let with_varM v x =
|
Chris@10
|
91 fetchAl >>= (fun al -> storeAl ((v, x) :: al)) >> returnM (Load v)
|
Chris@10
|
92
|
Chris@10
|
93 let inlineM = returnM
|
Chris@10
|
94
|
Chris@10
|
95 let with_tempM x = match x with
|
Chris@10
|
96 | Load v when Variable.is_temporary v -> inlineM x (* avoid trivial moves *)
|
Chris@10
|
97 | _ -> with_varM (fresh ()) x
|
Chris@10
|
98
|
Chris@10
|
99 (* declare a temporary only if node is used more than once *)
|
Chris@10
|
100 let with_temp_maybeM node x =
|
Chris@10
|
101 fetchVisited >>= (fun v ->
|
Chris@10
|
102 match node_lookup node v with
|
Chris@10
|
103 Some count ->
|
Chris@10
|
104 if (count = 1 && !Magic.inline_single) then
|
Chris@10
|
105 inlineM x
|
Chris@10
|
106 else
|
Chris@10
|
107 with_tempM x
|
Chris@10
|
108 | None ->
|
Chris@10
|
109 failwith "with_temp_maybeM")
|
Chris@10
|
110 type fma =
|
Chris@10
|
111 NO_FMA
|
Chris@10
|
112 | FMA of expr * expr * expr (* FMA (a, b, c) => a + b * c *)
|
Chris@10
|
113 | FMS of expr * expr * expr (* FMS (a, b, c) => -a + b * c *)
|
Chris@10
|
114 | FNMS of expr * expr * expr (* FNMS (a, b, c) => a - b * c *)
|
Chris@10
|
115
|
Chris@10
|
116 let good_for_fma (a, b) =
|
Chris@10
|
117 let good = function
|
Chris@10
|
118 | NaN I -> true
|
Chris@10
|
119 | NaN CONJ -> true
|
Chris@10
|
120 | NaN _ -> false
|
Chris@10
|
121 | Times(NaN _, _) -> false
|
Chris@10
|
122 | Times(_, NaN _) -> false
|
Chris@10
|
123 | _ -> true
|
Chris@10
|
124 in good a && good b
|
Chris@10
|
125
|
Chris@10
|
126 let build_fma l =
|
Chris@10
|
127 if (not !Magic.enable_fma) then NO_FMA
|
Chris@10
|
128 else match l with
|
Chris@10
|
129 | [a; Uminus (Times (b, c))] when good_for_fma (b, c) -> FNMS (a, b, c)
|
Chris@10
|
130 | [Uminus (Times (b, c)); a] when good_for_fma (b, c) -> FNMS (a, b, c)
|
Chris@10
|
131 | [Uminus a; Times (b, c)] when good_for_fma (b, c) -> FMS (a, b, c)
|
Chris@10
|
132 | [Times (b, c); Uminus a] when good_for_fma (b, c) -> FMS (a, b, c)
|
Chris@10
|
133 | [a; Times (b, c)] when good_for_fma (b, c) -> FMA (a, b, c)
|
Chris@10
|
134 | [Times (b, c); a] when good_for_fma (b, c) -> FMA (a, b, c)
|
Chris@10
|
135 | _ -> NO_FMA
|
Chris@10
|
136
|
Chris@10
|
137 let children_fma l = match build_fma l with
|
Chris@10
|
138 | FMA (a, b, c) -> Some (a, b, c)
|
Chris@10
|
139 | FMS (a, b, c) -> Some (a, b, c)
|
Chris@10
|
140 | FNMS (a, b, c) -> Some (a, b, c)
|
Chris@10
|
141 | NO_FMA -> None
|
Chris@10
|
142
|
Chris@10
|
143
|
Chris@10
|
144 let rec visitM x =
|
Chris@10
|
145 counting (function
|
Chris@10
|
146 | Load v -> returnM ()
|
Chris@10
|
147 | Num a -> returnM ()
|
Chris@10
|
148 | NaN a -> returnM ()
|
Chris@10
|
149 | Store (v, x) -> visitM x
|
Chris@10
|
150 | Plus a -> (match children_fma a with
|
Chris@10
|
151 None -> mapM visitM a >> returnM ()
|
Chris@10
|
152 | Some (a, b, c) ->
|
Chris@10
|
153 (* visit fma's arguments twice to make sure they are not inlined *)
|
Chris@10
|
154 visitM a >> visitM a >>
|
Chris@10
|
155 visitM b >> visitM b >>
|
Chris@10
|
156 visitM c >> visitM c)
|
Chris@10
|
157 | Times (a, b) -> visitM a >> visitM b
|
Chris@10
|
158 | CTimes (a, b) -> visitM a >> visitM b
|
Chris@10
|
159 | CTimesJ (a, b) -> visitM a >> visitM b
|
Chris@10
|
160 | Uminus a -> visitM a)
|
Chris@10
|
161 x
|
Chris@10
|
162
|
Chris@10
|
163 let visit_rootsM = mapM visitM
|
Chris@10
|
164
|
Chris@10
|
165
|
Chris@10
|
166 let rec expr_of_nodeM x =
|
Chris@10
|
167 memoizing lookupVisitedM' insertVisitedM'
|
Chris@10
|
168 (function x -> match x with
|
Chris@10
|
169 | Load v ->
|
Chris@10
|
170 if (Variable.is_temporary v) then
|
Chris@10
|
171 inlineM (Load v)
|
Chris@10
|
172 else if (Variable.is_locative v && !Magic.inline_loads) then
|
Chris@10
|
173 inlineM (Load v)
|
Chris@10
|
174 else if (Variable.is_constant v && !Magic.inline_loads_constants) then
|
Chris@10
|
175 inlineM (Load v)
|
Chris@10
|
176 else
|
Chris@10
|
177 with_tempM (Load v)
|
Chris@10
|
178 | Num a ->
|
Chris@10
|
179 if !Magic.inline_constants then
|
Chris@10
|
180 inlineM (Num a)
|
Chris@10
|
181 else
|
Chris@10
|
182 with_temp_maybeM x (Num a)
|
Chris@10
|
183 | NaN a -> inlineM (NaN a)
|
Chris@10
|
184 | Store (v, x) ->
|
Chris@10
|
185 expr_of_nodeM x >>=
|
Chris@10
|
186 (if !Magic.trivial_stores then with_tempM else inlineM) >>=
|
Chris@10
|
187 with_varM v
|
Chris@10
|
188
|
Chris@10
|
189 | Plus a ->
|
Chris@10
|
190 begin
|
Chris@10
|
191 match build_fma a with
|
Chris@10
|
192 FMA (a, b, c) ->
|
Chris@10
|
193 expr_of_nodeM a >>= fun a' ->
|
Chris@10
|
194 expr_of_nodeM b >>= fun b' ->
|
Chris@10
|
195 expr_of_nodeM c >>= fun c' ->
|
Chris@10
|
196 with_temp_maybeM x (Plus [a'; Times (b', c')])
|
Chris@10
|
197 | FMS (a, b, c) ->
|
Chris@10
|
198 expr_of_nodeM a >>= fun a' ->
|
Chris@10
|
199 expr_of_nodeM b >>= fun b' ->
|
Chris@10
|
200 expr_of_nodeM c >>= fun c' ->
|
Chris@10
|
201 with_temp_maybeM x
|
Chris@10
|
202 (Plus [Times (b', c'); Uminus a'])
|
Chris@10
|
203 | FNMS (a, b, c) ->
|
Chris@10
|
204 expr_of_nodeM a >>= fun a' ->
|
Chris@10
|
205 expr_of_nodeM b >>= fun b' ->
|
Chris@10
|
206 expr_of_nodeM c >>= fun c' ->
|
Chris@10
|
207 with_temp_maybeM x
|
Chris@10
|
208 (Plus [a'; Uminus (Times (b', c'))])
|
Chris@10
|
209 | NO_FMA ->
|
Chris@10
|
210 mapM expr_of_nodeM a >>= fun a' ->
|
Chris@10
|
211 with_temp_maybeM x (Plus a')
|
Chris@10
|
212 end
|
Chris@10
|
213 | CTimes (Load _ as a, b) when !Magic.generate_bytw ->
|
Chris@10
|
214 expr_of_nodeM b >>= fun b' ->
|
Chris@10
|
215 with_tempM (CTimes (a, b'))
|
Chris@10
|
216 | CTimes (a, b) ->
|
Chris@10
|
217 expr_of_nodeM a >>= fun a' ->
|
Chris@10
|
218 expr_of_nodeM b >>= fun b' ->
|
Chris@10
|
219 with_tempM (CTimes (a', b'))
|
Chris@10
|
220 | CTimesJ (Load _ as a, b) when !Magic.generate_bytw ->
|
Chris@10
|
221 expr_of_nodeM b >>= fun b' ->
|
Chris@10
|
222 with_tempM (CTimesJ (a, b'))
|
Chris@10
|
223 | CTimesJ (a, b) ->
|
Chris@10
|
224 expr_of_nodeM a >>= fun a' ->
|
Chris@10
|
225 expr_of_nodeM b >>= fun b' ->
|
Chris@10
|
226 with_tempM (CTimesJ (a', b'))
|
Chris@10
|
227 | Times (a, b) ->
|
Chris@10
|
228 expr_of_nodeM a >>= fun a' ->
|
Chris@10
|
229 expr_of_nodeM b >>= fun b' ->
|
Chris@10
|
230 begin
|
Chris@10
|
231 match a' with
|
Chris@10
|
232 Num a'' when !Magic.strength_reduce_mul && Number.is_two a'' ->
|
Chris@10
|
233 (inlineM b' >>= fun b'' ->
|
Chris@10
|
234 with_temp_maybeM x (Plus [b''; b'']))
|
Chris@10
|
235 | _ -> with_temp_maybeM x (Times (a', b'))
|
Chris@10
|
236 end
|
Chris@10
|
237 | Uminus a ->
|
Chris@10
|
238 expr_of_nodeM a >>= fun a' ->
|
Chris@10
|
239 inlineM (Uminus a'))
|
Chris@10
|
240 x
|
Chris@10
|
241
|
Chris@10
|
242 let expr_of_rootsM = mapM expr_of_nodeM
|
Chris@10
|
243
|
Chris@10
|
244 let peek_alistM roots =
|
Chris@10
|
245 visit_rootsM roots >> expr_of_rootsM roots >> fetchAl
|
Chris@10
|
246
|
Chris@10
|
247 let wrap_assign (a, b) = Expr.Assign (a, b)
|
Chris@10
|
248
|
Chris@10
|
249 let to_assignments dag =
|
Chris@10
|
250 let () = Util.info "begin to_alist" in
|
Chris@10
|
251 let al = List.rev (runM ([], empty, empty) peek_alistM dag) in
|
Chris@10
|
252 let res = List.map wrap_assign al in
|
Chris@10
|
253 let () = Util.info "end to_alist" in
|
Chris@10
|
254 res
|
Chris@10
|
255
|
Chris@10
|
256
|
Chris@10
|
257 (* dump alist in `dot' format *)
|
Chris@10
|
258 let dump print alist =
|
Chris@10
|
259 let vs v = "\"" ^ (Variable.unparse v) ^ "\"" in
|
Chris@10
|
260 begin
|
Chris@10
|
261 print "digraph G {\n";
|
Chris@10
|
262 print "\tsize=\"6,6\";\n";
|
Chris@10
|
263
|
Chris@10
|
264 (* all input nodes have the same rank *)
|
Chris@10
|
265 print "{ rank = same;\n";
|
Chris@10
|
266 List.iter (fun (Expr.Assign (v, x)) ->
|
Chris@10
|
267 List.iter (fun y ->
|
Chris@10
|
268 if (Variable.is_locative y) then print("\t" ^ (vs y) ^ ";\n"))
|
Chris@10
|
269 (Expr.find_vars x))
|
Chris@10
|
270 alist;
|
Chris@10
|
271 print "}\n";
|
Chris@10
|
272
|
Chris@10
|
273 (* all output nodes have the same rank *)
|
Chris@10
|
274 print "{ rank = same;\n";
|
Chris@10
|
275 List.iter (fun (Expr.Assign (v, x)) ->
|
Chris@10
|
276 if (Variable.is_locative v) then print("\t" ^ (vs v) ^ ";\n"))
|
Chris@10
|
277 alist;
|
Chris@10
|
278 print "}\n";
|
Chris@10
|
279
|
Chris@10
|
280 (* edges *)
|
Chris@10
|
281 List.iter (fun (Expr.Assign (v, x)) ->
|
Chris@10
|
282 List.iter (fun y -> print("\t" ^ (vs y) ^ " -> " ^ (vs v) ^ ";\n"))
|
Chris@10
|
283 (Expr.find_vars x))
|
Chris@10
|
284 alist;
|
Chris@10
|
285
|
Chris@10
|
286 print "}\n";
|
Chris@10
|
287 end
|
Chris@10
|
288
|