Chris@10
|
1 (*
|
Chris@10
|
2 * Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
Chris@10
|
3 * Copyright (c) 2003, 2007-11 Matteo Frigo
|
Chris@10
|
4 * Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology
|
Chris@10
|
5 *
|
Chris@10
|
6 * This program is free software; you can redistribute it and/or modify
|
Chris@10
|
7 * it under the terms of the GNU General Public License as published by
|
Chris@10
|
8 * the Free Software Foundation; either version 2 of the License, or
|
Chris@10
|
9 * (at your option) any later version.
|
Chris@10
|
10 *
|
Chris@10
|
11 * This program is distributed in the hope that it will be useful,
|
Chris@10
|
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
Chris@10
|
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
Chris@10
|
14 * GNU General Public License for more details.
|
Chris@10
|
15 *
|
Chris@10
|
16 * You should have received a copy of the GNU General Public License
|
Chris@10
|
17 * along with this program; if not, write to the Free Software
|
Chris@10
|
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
Chris@10
|
19 *
|
Chris@10
|
20 *)
|
Chris@10
|
21
|
Chris@10
|
22 (* Here, we take a schedule (produced by schedule.ml) ordering a
|
Chris@10
|
23 sequence of instructions, and produce an annotated schedule. The
|
Chris@10
|
24 annotated schedule has the same ordering as the original schedule,
|
Chris@10
|
25 but is additionally partitioned into nested blocks of temporary
|
Chris@10
|
26 variables. The partitioning is computed via a heuristic algorithm.
|
Chris@10
|
27
|
Chris@10
|
28 The blocking allows the C code that we generate to consist of
|
Chris@10
|
29 nested blocks that help communicate variable lifetimes to the
|
Chris@10
|
30 compiler. *)
|
Chris@10
|
31
|
Chris@10
|
32 open Schedule
|
Chris@10
|
33 open Expr
|
Chris@10
|
34 open Variable
|
Chris@10
|
35
|
Chris@10
|
36 type annotated_schedule =
|
Chris@10
|
37 Annotate of variable list * variable list * variable list * int * aschedule
|
Chris@10
|
38 and aschedule =
|
Chris@10
|
39 ADone
|
Chris@10
|
40 | AInstr of assignment
|
Chris@10
|
41 | ASeq of (annotated_schedule * annotated_schedule)
|
Chris@10
|
42
|
Chris@10
|
43 let addelem a set = if not (List.memq a set) then a :: set else set
|
Chris@10
|
44 let union l =
|
Chris@10
|
45 let f x = addelem x (* let is source of polymorphism *)
|
Chris@10
|
46 in List.fold_right f l
|
Chris@10
|
47
|
Chris@10
|
48 (* set difference a - b *)
|
Chris@10
|
49 let diff a b = List.filter (fun x -> not (List.memq x b)) a
|
Chris@10
|
50
|
Chris@10
|
51 let rec minimize f = function
|
Chris@10
|
52 [] -> failwith "minimize"
|
Chris@10
|
53 | [n] -> n
|
Chris@10
|
54 | n :: rest ->
|
Chris@10
|
55 let x = minimize f rest in
|
Chris@10
|
56 if (f x) >= (f n) then n else x
|
Chris@10
|
57
|
Chris@10
|
58 (* find all variables used inside a scheduling unit *)
|
Chris@10
|
59 let rec find_block_vars = function
|
Chris@10
|
60 Done -> []
|
Chris@10
|
61 | (Instr (Assign (v, x))) -> v :: (find_vars x)
|
Chris@10
|
62 | Par a -> List.flatten (List.map find_block_vars a)
|
Chris@10
|
63 | Seq (a, b) -> (find_block_vars a) @ (find_block_vars b)
|
Chris@10
|
64
|
Chris@10
|
65 let uniq l =
|
Chris@10
|
66 List.fold_right (fun a b -> if List.memq a b then b else a :: b) l []
|
Chris@10
|
67
|
Chris@10
|
68 let has_related x = List.exists (Variable.same_class x)
|
Chris@10
|
69
|
Chris@10
|
70 let rec overlap a b = Util.count (fun y -> has_related y b) a
|
Chris@10
|
71
|
Chris@10
|
72 (* reorder a list of schedules so as to maximize overlap of variables *)
|
Chris@10
|
73 let reorder l =
|
Chris@10
|
74 let rec loop = function
|
Chris@10
|
75 [] -> []
|
Chris@10
|
76 | (a, va) :: b ->
|
Chris@10
|
77 let c =
|
Chris@10
|
78 List.map
|
Chris@10
|
79 (fun (a, x) -> ((a, x), (overlap va x, List.length x))) b in
|
Chris@10
|
80 let c' =
|
Chris@10
|
81 Sort.list
|
Chris@10
|
82 (fun (_, (a, la)) (_, (b, lb)) ->
|
Chris@10
|
83 la < lb or a > b)
|
Chris@10
|
84 c in
|
Chris@10
|
85 let b' = List.map (fun (a, _) -> a) c' in
|
Chris@10
|
86 a :: (loop b') in
|
Chris@10
|
87 let l' = List.map (fun x -> x, uniq (find_block_vars x)) l in
|
Chris@10
|
88 (* start with smallest block --- does this matter ? *)
|
Chris@10
|
89 match l' with
|
Chris@10
|
90 [] -> []
|
Chris@10
|
91 | _ ->
|
Chris@10
|
92 let m = minimize (fun (_, x) -> (List.length x)) l' in
|
Chris@10
|
93 let l'' = Util.remove m l' in
|
Chris@10
|
94 loop (m :: l'')
|
Chris@10
|
95
|
Chris@10
|
96 (* remove Par blocks *)
|
Chris@10
|
97 let rec linearize = function
|
Chris@10
|
98 | Seq (a, Done) -> linearize a
|
Chris@10
|
99 | Seq (Done, a) -> linearize a
|
Chris@10
|
100 | Seq (a, b) -> Seq (linearize a, linearize b)
|
Chris@10
|
101
|
Chris@10
|
102 (* try to balance nested Par blocks *)
|
Chris@10
|
103 | Par [a] -> linearize a
|
Chris@10
|
104 | Par l ->
|
Chris@10
|
105 let n2 = (List.length l) / 2 in
|
Chris@10
|
106 let rec loop n a b =
|
Chris@10
|
107 if n = 0 then
|
Chris@10
|
108 (List.rev b, a)
|
Chris@10
|
109 else
|
Chris@10
|
110 match a with
|
Chris@10
|
111 [] -> failwith "loop"
|
Chris@10
|
112 | x :: y -> loop (n - 1) y (x :: b)
|
Chris@10
|
113 in let (a, b) = loop n2 (reorder l) []
|
Chris@10
|
114 in linearize (Seq (Par a, Par b))
|
Chris@10
|
115
|
Chris@10
|
116 | x -> x
|
Chris@10
|
117
|
Chris@10
|
118 let subset a b =
|
Chris@10
|
119 List.for_all (fun x -> List.exists (fun y -> x == y) b) a
|
Chris@10
|
120
|
Chris@10
|
121 let use_same_vars (Assign (av, ax)) (Assign (bv, bx)) =
|
Chris@10
|
122 is_temporary av &&
|
Chris@10
|
123 is_temporary bv &&
|
Chris@10
|
124 (let va = Expr.find_vars ax and vb = Expr.find_vars bx in
|
Chris@10
|
125 subset va vb && subset vb va)
|
Chris@10
|
126
|
Chris@10
|
127 let store_to_same_class (Assign (av, ax)) (Assign (bv, bx)) =
|
Chris@10
|
128 is_locative av &&
|
Chris@10
|
129 is_locative bv &&
|
Chris@10
|
130 Variable.same_class av bv
|
Chris@10
|
131
|
Chris@10
|
132 let loads_from_same_class (Assign (av, ax)) (Assign (bv, bx)) =
|
Chris@10
|
133 match (ax, bx) with
|
Chris@10
|
134 | (Load a), (Load b) when
|
Chris@10
|
135 Variable.is_locative a && Variable.is_locative b
|
Chris@10
|
136 -> Variable.same_class a b
|
Chris@10
|
137 | _ -> false
|
Chris@10
|
138
|
Chris@10
|
139 (* extract instructions from schedule *)
|
Chris@10
|
140 let rec sched_to_ilist = function
|
Chris@10
|
141 | Done -> []
|
Chris@10
|
142 | Instr a -> [a]
|
Chris@10
|
143 | Seq (a, b) -> (sched_to_ilist a) @ (sched_to_ilist b)
|
Chris@10
|
144 | _ -> failwith "sched_to_ilist" (* Par blocks removed by linearize *)
|
Chris@10
|
145
|
Chris@10
|
146 let rec find_friends friendp insn friends foes = function
|
Chris@10
|
147 | [] -> (friends, foes)
|
Chris@10
|
148 | a :: b ->
|
Chris@10
|
149 if (a == insn) || (friendp a insn) then
|
Chris@10
|
150 find_friends friendp insn (a :: friends) foes b
|
Chris@10
|
151 else
|
Chris@10
|
152 find_friends friendp insn friends (a :: foes) b
|
Chris@10
|
153
|
Chris@10
|
154 (* schedule all instructions in the equivalence class determined
|
Chris@10
|
155 by friendp at the point where the last one
|
Chris@10
|
156 is executed *)
|
Chris@10
|
157 let rec delay_friends friendp sched =
|
Chris@10
|
158 let rec recur insns = function
|
Chris@10
|
159 | Done -> (Done, insns)
|
Chris@10
|
160 | Instr a ->
|
Chris@10
|
161 let (friends, foes) = find_friends friendp a [] [] insns in
|
Chris@10
|
162 (Schedule.sequentially friends), foes
|
Chris@10
|
163 | Seq (a, b) ->
|
Chris@10
|
164 let (b', insnsb) = recur insns b in
|
Chris@10
|
165 let (a', insnsa) = recur insnsb a in
|
Chris@10
|
166 (Seq (a', b')), insnsa
|
Chris@10
|
167 | _ -> failwith "delay_friends"
|
Chris@10
|
168 in match recur (sched_to_ilist sched) sched with
|
Chris@10
|
169 | (s, []) -> s (* assert that all insns have been used *)
|
Chris@10
|
170 | _ -> failwith "delay_friends"
|
Chris@10
|
171
|
Chris@10
|
172 (* schedule all instructions in the equivalence class determined
|
Chris@10
|
173 by friendp at the point where the first one
|
Chris@10
|
174 is executed *)
|
Chris@10
|
175 let rec anticipate_friends friendp sched =
|
Chris@10
|
176 let rec recur insns = function
|
Chris@10
|
177 | Done -> (Done, insns)
|
Chris@10
|
178 | Instr a ->
|
Chris@10
|
179 let (friends, foes) = find_friends friendp a [] [] insns in
|
Chris@10
|
180 (Schedule.sequentially friends), foes
|
Chris@10
|
181 | Seq (a, b) ->
|
Chris@10
|
182 let (a', insnsa) = recur insns a in
|
Chris@10
|
183 let (b', insnsb) = recur insnsa b in
|
Chris@10
|
184 (Seq (a', b')), insnsb
|
Chris@10
|
185 | _ -> failwith "anticipate_friends"
|
Chris@10
|
186 in match recur (sched_to_ilist sched) sched with
|
Chris@10
|
187 | (s, []) -> s (* assert that all insns have been used *)
|
Chris@10
|
188 | _ -> failwith "anticipate_friends"
|
Chris@10
|
189
|
Chris@10
|
190 let collect_buddy_stores buddy_list sched =
|
Chris@10
|
191 let rec recur sched delayed_stores = match sched with
|
Chris@10
|
192 | Done -> (sched, delayed_stores)
|
Chris@10
|
193 | Instr (Assign (v, x)) ->
|
Chris@10
|
194 begin
|
Chris@10
|
195 try
|
Chris@10
|
196 let buddies = List.find (List.memq v) buddy_list in
|
Chris@10
|
197 let tmp = Variable.make_temporary () in
|
Chris@10
|
198 let i = Seq(Instr (Assign (tmp, x)),
|
Chris@10
|
199 Instr (Assign (v, Times (NaN MULTI_A, Load tmp))))
|
Chris@10
|
200 and delayed_stores = (v, Load tmp) :: delayed_stores in
|
Chris@10
|
201 try
|
Chris@10
|
202 (Seq (i,
|
Chris@10
|
203 Instr (Assign
|
Chris@10
|
204 (List.hd buddies,
|
Chris@10
|
205 Times (NaN MULTI_B,
|
Chris@10
|
206 Plus (List.map
|
Chris@10
|
207 (fun buddy ->
|
Chris@10
|
208 List.assq buddy
|
Chris@10
|
209 delayed_stores)
|
Chris@10
|
210 buddies))) )))
|
Chris@10
|
211 , delayed_stores
|
Chris@10
|
212 with Not_found -> (i, delayed_stores)
|
Chris@10
|
213 with Not_found -> (sched, delayed_stores)
|
Chris@10
|
214 end
|
Chris@10
|
215 | Seq (a, b) ->
|
Chris@10
|
216 let (newa, delayed_stores) = recur a delayed_stores in
|
Chris@10
|
217 let (newb, delayed_stores) = recur b delayed_stores in
|
Chris@10
|
218 (Seq (newa, newb), delayed_stores)
|
Chris@10
|
219 | _ -> failwith "collect_buddy_stores"
|
Chris@10
|
220 in let (sched, _) = recur sched [] in
|
Chris@10
|
221 sched
|
Chris@10
|
222
|
Chris@10
|
223 let schedule_for_pipeline sched =
|
Chris@10
|
224 let update_readytimes t (Assign (v, _)) ready_times =
|
Chris@10
|
225 (v, (t + !Magic.pipeline_latency)) :: ready_times
|
Chris@10
|
226 and readyp t ready_times (Assign (_, x)) =
|
Chris@10
|
227 List.for_all
|
Chris@10
|
228 (fun var ->
|
Chris@10
|
229 try
|
Chris@10
|
230 (List.assq var ready_times) <= t
|
Chris@10
|
231 with Not_found -> false)
|
Chris@10
|
232 (List.filter Variable.is_temporary (Expr.find_vars x))
|
Chris@10
|
233 in
|
Chris@10
|
234 let rec recur sched t ready_times delayed_instructions =
|
Chris@10
|
235 let (ready, not_ready) =
|
Chris@10
|
236 List.partition (readyp t ready_times) delayed_instructions
|
Chris@10
|
237 in match ready with
|
Chris@10
|
238 | a :: b ->
|
Chris@10
|
239 let (sched, t, ready_times, delayed_instructions) =
|
Chris@10
|
240 recur sched (t+1) (update_readytimes t a ready_times)
|
Chris@10
|
241 (b @ not_ready)
|
Chris@10
|
242 in
|
Chris@10
|
243 (Seq (Instr a, sched)), t, ready_times, delayed_instructions
|
Chris@10
|
244 | _ -> (match sched with
|
Chris@10
|
245 | Done -> (sched, t, ready_times, delayed_instructions)
|
Chris@10
|
246 | Instr a ->
|
Chris@10
|
247 if (readyp t ready_times a) then
|
Chris@10
|
248 (sched, (t+1), (update_readytimes t a ready_times),
|
Chris@10
|
249 delayed_instructions)
|
Chris@10
|
250 else
|
Chris@10
|
251 (Done, t, ready_times, (a :: delayed_instructions))
|
Chris@10
|
252 | Seq (a, b) ->
|
Chris@10
|
253 let (a, t, ready_times, delayed_instructions) =
|
Chris@10
|
254 recur a t ready_times delayed_instructions
|
Chris@10
|
255 in
|
Chris@10
|
256 let (b, t, ready_times, delayed_instructions) =
|
Chris@10
|
257 recur b t ready_times delayed_instructions
|
Chris@10
|
258 in (Seq (a, b)), t, ready_times, delayed_instructions
|
Chris@10
|
259 | _ -> failwith "schedule_for_pipeline")
|
Chris@10
|
260 in let rec recur_until_done sched t ready_times delayed_instructions =
|
Chris@10
|
261 let (sched, t, ready_times, delayed_instructions) =
|
Chris@10
|
262 recur sched t ready_times delayed_instructions
|
Chris@10
|
263 in match delayed_instructions with
|
Chris@10
|
264 | [] -> sched
|
Chris@10
|
265 | _ ->
|
Chris@10
|
266 (Seq (sched,
|
Chris@10
|
267 (recur_until_done Done (t+1) ready_times
|
Chris@10
|
268 delayed_instructions)))
|
Chris@10
|
269 in recur_until_done sched 0 [] []
|
Chris@10
|
270
|
Chris@10
|
271 let rec rewrite_declarations force_declarations
|
Chris@10
|
272 (Annotate (_, _, declared, _, what)) =
|
Chris@10
|
273 let m = !Magic.number_of_variables in
|
Chris@10
|
274
|
Chris@10
|
275 let declare_it declared =
|
Chris@10
|
276 if (force_declarations or List.length declared >= m) then
|
Chris@10
|
277 ([], declared)
|
Chris@10
|
278 else
|
Chris@10
|
279 (declared, [])
|
Chris@10
|
280
|
Chris@10
|
281 in match what with
|
Chris@10
|
282 ADone -> Annotate ([], [], [], 0, what)
|
Chris@10
|
283 | AInstr i ->
|
Chris@10
|
284 let (u, d) = declare_it declared
|
Chris@10
|
285 in Annotate ([], u, d, 0, what)
|
Chris@10
|
286 | ASeq (a, b) ->
|
Chris@10
|
287 let ma = rewrite_declarations false a
|
Chris@10
|
288 and mb = rewrite_declarations false b
|
Chris@10
|
289 in let Annotate (_, ua, _, _, _) = ma
|
Chris@10
|
290 and Annotate (_, ub, _, _, _) = mb
|
Chris@10
|
291 in let (u, d) = declare_it (declared @ ua @ ub)
|
Chris@10
|
292 in Annotate ([], u, d, 0, ASeq (ma, mb))
|
Chris@10
|
293
|
Chris@10
|
294 let annotate list_of_buddy_stores schedule =
|
Chris@10
|
295 let rec analyze live_at_end = function
|
Chris@10
|
296 Done -> Annotate (live_at_end, [], [], 0, ADone)
|
Chris@10
|
297 | Instr i -> (match i with
|
Chris@10
|
298 Assign (v, x) ->
|
Chris@10
|
299 let vars = (find_vars x) in
|
Chris@10
|
300 Annotate (Util.remove v (union live_at_end vars), [v], [],
|
Chris@10
|
301 0, AInstr i))
|
Chris@10
|
302 | Seq (a, b) ->
|
Chris@10
|
303 let ab = analyze live_at_end b in
|
Chris@10
|
304 let Annotate (live_at_begin_b, defined_b, _, depth_a, _) = ab in
|
Chris@10
|
305 let aa = analyze live_at_begin_b a in
|
Chris@10
|
306 let Annotate (live_at_begin_a, defined_a, _, depth_b, _) = aa in
|
Chris@10
|
307 let defined = List.filter is_temporary (defined_a @ defined_b) in
|
Chris@10
|
308 let declarable = diff defined live_at_end in
|
Chris@10
|
309 let undeclarable = diff defined declarable
|
Chris@10
|
310 and maxdepth = max depth_a depth_b in
|
Chris@10
|
311 Annotate (live_at_begin_a, undeclarable, declarable,
|
Chris@10
|
312 List.length declarable + maxdepth,
|
Chris@10
|
313 ASeq (aa, ab))
|
Chris@10
|
314 | _ -> failwith "really_analyze"
|
Chris@10
|
315
|
Chris@10
|
316 in
|
Chris@10
|
317 let () = Util.info "begin annotate" in
|
Chris@10
|
318 let x = linearize schedule in
|
Chris@10
|
319
|
Chris@10
|
320 let x =
|
Chris@10
|
321 if (!Magic.schedule_for_pipeline && !Magic.pipeline_latency > 0) then
|
Chris@10
|
322 schedule_for_pipeline x
|
Chris@10
|
323 else
|
Chris@10
|
324 x
|
Chris@10
|
325 in
|
Chris@10
|
326
|
Chris@10
|
327 let x =
|
Chris@10
|
328 if !Magic.reorder_insns then
|
Chris@10
|
329 linearize(anticipate_friends use_same_vars x)
|
Chris@10
|
330 else
|
Chris@10
|
331 x
|
Chris@10
|
332 in
|
Chris@10
|
333
|
Chris@10
|
334 (* delay stores to the real and imaginary parts of the same number *)
|
Chris@10
|
335 let x =
|
Chris@10
|
336 if !Magic.reorder_stores then
|
Chris@10
|
337 linearize(delay_friends store_to_same_class x)
|
Chris@10
|
338 else
|
Chris@10
|
339 x
|
Chris@10
|
340 in
|
Chris@10
|
341
|
Chris@10
|
342 (* move loads of the real and imaginary parts of the same number *)
|
Chris@10
|
343 let x =
|
Chris@10
|
344 if !Magic.reorder_loads then
|
Chris@10
|
345 linearize(anticipate_friends loads_from_same_class x)
|
Chris@10
|
346 else
|
Chris@10
|
347 x
|
Chris@10
|
348 in
|
Chris@10
|
349
|
Chris@10
|
350 let x = collect_buddy_stores list_of_buddy_stores x in
|
Chris@10
|
351 let x = analyze [] x in
|
Chris@10
|
352 let res = rewrite_declarations true x in
|
Chris@10
|
353 let () = Util.info "end annotate" in
|
Chris@10
|
354 res
|
Chris@10
|
355
|
Chris@10
|
356 let rec dump print (Annotate (_, _, _, _, code)) =
|
Chris@10
|
357 dump_code print code
|
Chris@10
|
358 and dump_code print = function
|
Chris@10
|
359 | ADone -> ()
|
Chris@10
|
360 | AInstr x -> print ((assignment_to_string x) ^ "\n")
|
Chris@10
|
361 | ASeq (a, b) -> dump print a; dump print b
|