| Chris@10 | 1 (* | 
| Chris@10 | 2  * Copyright (c) 1997-1999 Massachusetts Institute of Technology | 
| Chris@10 | 3  * Copyright (c) 2003, 2007-11 Matteo Frigo | 
| Chris@10 | 4  * Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology | 
| Chris@10 | 5  * | 
| Chris@10 | 6  * This program is free software; you can redistribute it and/or modify | 
| Chris@10 | 7  * it under the terms of the GNU General Public License as published by | 
| Chris@10 | 8  * the Free Software Foundation; either version 2 of the License, or | 
| Chris@10 | 9  * (at your option) any later version. | 
| Chris@10 | 10  * | 
| Chris@10 | 11  * This program is distributed in the hope that it will be useful, | 
| Chris@10 | 12  * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
| Chris@10 | 13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
| Chris@10 | 14  * GNU General Public License for more details. | 
| Chris@10 | 15  * | 
| Chris@10 | 16  * You should have received a copy of the GNU General Public License | 
| Chris@10 | 17  * along with this program; if not, write to the Free Software | 
| Chris@10 | 18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA | 
| Chris@10 | 19  * | 
| Chris@10 | 20  *) | 
| Chris@10 | 21 | 
| Chris@10 | 22 (* Here, we take a schedule (produced by schedule.ml) ordering a | 
| Chris@10 | 23    sequence of instructions, and produce an annotated schedule.  The | 
| Chris@10 | 24    annotated schedule has the same ordering as the original schedule, | 
| Chris@10 | 25    but is additionally partitioned into nested blocks of temporary | 
| Chris@10 | 26    variables.  The partitioning is computed via a heuristic algorithm. | 
| Chris@10 | 27 | 
| Chris@10 | 28    The blocking allows the C code that we generate to consist of | 
| Chris@10 | 29    nested blocks that help communicate variable lifetimes to the | 
| Chris@10 | 30    compiler. *) | 
| Chris@10 | 31 | 
| Chris@10 | 32 open Schedule | 
| Chris@10 | 33 open Expr | 
| Chris@10 | 34 open Variable | 
| Chris@10 | 35 | 
| Chris@10 | 36 type annotated_schedule = | 
| Chris@10 | 37     Annotate of variable list * variable list * variable list * int * aschedule | 
| Chris@10 | 38 and aschedule = | 
| Chris@10 | 39     ADone | 
| Chris@10 | 40   | AInstr of assignment | 
| Chris@10 | 41   | ASeq of (annotated_schedule * annotated_schedule) | 
| Chris@10 | 42 | 
| Chris@10 | 43 let addelem a set = if not (List.memq a set) then a :: set else set | 
| Chris@10 | 44 let union l = | 
| Chris@10 | 45   let f x = addelem x   (* let is source of polymorphism *) | 
| Chris@10 | 46   in List.fold_right f l | 
| Chris@10 | 47 | 
| Chris@10 | 48 (* set difference a - b *) | 
| Chris@10 | 49 let diff a b = List.filter (fun x -> not (List.memq x b)) a | 
| Chris@10 | 50 | 
| Chris@10 | 51 let rec minimize f = function | 
| Chris@10 | 52     [] -> failwith "minimize" | 
| Chris@10 | 53   | [n] -> n | 
| Chris@10 | 54   | n :: rest -> | 
| Chris@10 | 55       let x = minimize f rest in | 
| Chris@10 | 56       if (f x) >= (f n) then n else x | 
| Chris@10 | 57 | 
| Chris@10 | 58 (* find all variables used inside a scheduling unit *) | 
| Chris@10 | 59 let rec find_block_vars = function | 
| Chris@10 | 60     Done -> [] | 
| Chris@10 | 61   | (Instr (Assign (v, x))) -> v :: (find_vars x) | 
| Chris@10 | 62   | Par a -> List.flatten (List.map find_block_vars a) | 
| Chris@10 | 63   | Seq (a, b) -> (find_block_vars a) @ (find_block_vars b) | 
| Chris@10 | 64 | 
| Chris@10 | 65 let uniq l = | 
| Chris@10 | 66   List.fold_right (fun a b -> if List.memq a b then b else a :: b) l [] | 
| Chris@10 | 67 | 
| Chris@10 | 68 let has_related x = List.exists (Variable.same_class x) | 
| Chris@10 | 69 | 
| Chris@10 | 70 let rec overlap a b = Util.count (fun y -> has_related y b) a | 
| Chris@10 | 71 | 
| Chris@10 | 72 (* reorder a list of schedules so as to maximize overlap of variables *) | 
| Chris@10 | 73 let reorder l = | 
| Chris@10 | 74   let rec loop = function | 
| Chris@10 | 75       [] -> [] | 
| Chris@10 | 76     | (a, va) :: b -> | 
| Chris@10 | 77 	let c = | 
| Chris@10 | 78 	  List.map | 
| Chris@10 | 79 	    (fun (a, x) -> ((a, x), (overlap va x, List.length x))) b in | 
| Chris@10 | 80 	let c' = | 
| Chris@10 | 81 	  Sort.list | 
| Chris@10 | 82 	    (fun (_, (a, la)) (_, (b, lb)) -> | 
| Chris@10 | 83 	      la < lb or a > b) | 
| Chris@10 | 84 	    c in | 
| Chris@10 | 85 	let b' = List.map (fun (a, _) -> a) c' in | 
| Chris@10 | 86 	a :: (loop b') in | 
| Chris@10 | 87   let l' = List.map (fun x -> x, uniq (find_block_vars x)) l in | 
| Chris@10 | 88   (* start with smallest block --- does this matter ? *) | 
| Chris@10 | 89   match l' with | 
| Chris@10 | 90     [] -> [] | 
| Chris@10 | 91   | _ -> | 
| Chris@10 | 92       let m = minimize (fun (_, x) -> (List.length x)) l' in | 
| Chris@10 | 93       let l'' = Util.remove m l' in | 
| Chris@10 | 94       loop (m :: l'') | 
| Chris@10 | 95 | 
| Chris@10 | 96 (* remove Par blocks *) | 
| Chris@10 | 97 let rec linearize = function | 
| Chris@10 | 98   | Seq (a, Done) -> linearize a | 
| Chris@10 | 99   | Seq (Done, a) -> linearize a | 
| Chris@10 | 100   | Seq (a, b) -> Seq (linearize a, linearize b) | 
| Chris@10 | 101 | 
| Chris@10 | 102   (* try to balance nested Par blocks *) | 
| Chris@10 | 103   | Par [a] -> linearize a | 
| Chris@10 | 104   | Par l -> | 
| Chris@10 | 105       let n2 = (List.length l) / 2 in | 
| Chris@10 | 106       let rec loop n a b = | 
| Chris@10 | 107 	if n = 0 then | 
| Chris@10 | 108 	  (List.rev b, a) | 
| Chris@10 | 109 	else | 
| Chris@10 | 110 	  match a with | 
| Chris@10 | 111 	    [] -> failwith "loop" | 
| Chris@10 | 112 	  | x :: y -> loop (n - 1) y (x :: b) | 
| Chris@10 | 113       in let (a, b) = loop n2 (reorder l) [] | 
| Chris@10 | 114       in linearize (Seq (Par a, Par b)) | 
| Chris@10 | 115 | 
| Chris@10 | 116   | x -> x | 
| Chris@10 | 117 | 
| Chris@10 | 118 let subset a b = | 
| Chris@10 | 119   List.for_all (fun x -> List.exists (fun y -> x == y) b) a | 
| Chris@10 | 120 | 
| Chris@10 | 121 let use_same_vars (Assign (av, ax)) (Assign (bv, bx)) = | 
| Chris@10 | 122   is_temporary av && | 
| Chris@10 | 123   is_temporary bv && | 
| Chris@10 | 124   (let va = Expr.find_vars ax and vb = Expr.find_vars bx in | 
| Chris@10 | 125    subset va vb && subset vb va) | 
| Chris@10 | 126 | 
| Chris@10 | 127 let store_to_same_class (Assign (av, ax)) (Assign (bv, bx)) = | 
| Chris@10 | 128   is_locative av && | 
| Chris@10 | 129   is_locative bv && | 
| Chris@10 | 130   Variable.same_class av bv | 
| Chris@10 | 131 | 
| Chris@10 | 132 let loads_from_same_class (Assign (av, ax)) (Assign (bv, bx)) = | 
| Chris@10 | 133   match (ax, bx) with | 
| Chris@10 | 134     | (Load a), (Load b) when | 
| Chris@10 | 135 	Variable.is_locative a && Variable.is_locative b | 
| Chris@10 | 136 	-> Variable.same_class a b | 
| Chris@10 | 137     | _ -> false | 
| Chris@10 | 138 | 
| Chris@10 | 139 (* extract instructions from schedule *) | 
| Chris@10 | 140 let rec sched_to_ilist = function | 
| Chris@10 | 141   | Done -> [] | 
| Chris@10 | 142   | Instr a -> [a] | 
| Chris@10 | 143   | Seq (a, b) -> (sched_to_ilist a) @ (sched_to_ilist b) | 
| Chris@10 | 144   | _ -> failwith "sched_to_ilist" (* Par blocks removed by linearize *) | 
| Chris@10 | 145 | 
| Chris@10 | 146 let rec find_friends friendp insn friends foes = function | 
| Chris@10 | 147   | [] -> (friends, foes) | 
| Chris@10 | 148   | a :: b -> | 
| Chris@10 | 149       if (a == insn) || (friendp a insn) then | 
| Chris@10 | 150 	find_friends friendp insn (a :: friends) foes b | 
| Chris@10 | 151       else | 
| Chris@10 | 152 	find_friends friendp insn friends (a :: foes) b | 
| Chris@10 | 153 | 
| Chris@10 | 154 (* schedule all instructions in the equivalence class determined | 
| Chris@10 | 155    by friendp at the point where the last one | 
| Chris@10 | 156    is executed *) | 
| Chris@10 | 157 let rec delay_friends friendp sched = | 
| Chris@10 | 158   let rec recur insns = function | 
| Chris@10 | 159     | Done -> (Done, insns) | 
| Chris@10 | 160     | Instr a -> | 
| Chris@10 | 161 	let (friends, foes) = find_friends friendp a [] [] insns in | 
| Chris@10 | 162 	(Schedule.sequentially friends), foes | 
| Chris@10 | 163     | Seq (a, b) -> | 
| Chris@10 | 164 	let (b', insnsb) = recur insns b in | 
| Chris@10 | 165 	let (a', insnsa) = recur insnsb a in | 
| Chris@10 | 166 	(Seq (a', b')), insnsa | 
| Chris@10 | 167     | _ -> failwith "delay_friends" | 
| Chris@10 | 168   in match recur (sched_to_ilist sched) sched with | 
| Chris@10 | 169   | (s, []) -> s (* assert that all insns have been used *) | 
| Chris@10 | 170   | _ -> failwith "delay_friends" | 
| Chris@10 | 171 | 
| Chris@10 | 172 (* schedule all instructions in the equivalence class determined | 
| Chris@10 | 173    by friendp at the point where the first one | 
| Chris@10 | 174    is executed *) | 
| Chris@10 | 175 let rec anticipate_friends friendp sched = | 
| Chris@10 | 176   let rec recur insns = function | 
| Chris@10 | 177     | Done -> (Done, insns) | 
| Chris@10 | 178     | Instr a -> | 
| Chris@10 | 179 	let (friends, foes) = find_friends friendp a [] [] insns in | 
| Chris@10 | 180 	(Schedule.sequentially friends), foes | 
| Chris@10 | 181     | Seq (a, b) -> | 
| Chris@10 | 182 	let (a', insnsa) = recur insns a in | 
| Chris@10 | 183 	let (b', insnsb) = recur insnsa b in | 
| Chris@10 | 184 	(Seq (a', b')), insnsb | 
| Chris@10 | 185     | _ -> failwith "anticipate_friends" | 
| Chris@10 | 186   in match recur (sched_to_ilist sched) sched with | 
| Chris@10 | 187   | (s, []) -> s (* assert that all insns have been used *) | 
| Chris@10 | 188   | _ -> failwith "anticipate_friends" | 
| Chris@10 | 189 | 
| Chris@10 | 190 let collect_buddy_stores buddy_list sched = | 
| Chris@10 | 191   let rec recur sched delayed_stores = match sched with | 
| Chris@10 | 192     | Done -> (sched, delayed_stores) | 
| Chris@10 | 193     | Instr (Assign (v, x)) -> | 
| Chris@10 | 194 	begin | 
| Chris@10 | 195 	  try | 
| Chris@10 | 196 	    let buddies = List.find (List.memq v) buddy_list in | 
| Chris@10 | 197 	    let tmp = Variable.make_temporary () in | 
| Chris@10 | 198 	    let i = Seq(Instr (Assign (tmp, x)), | 
| Chris@10 | 199 			Instr (Assign (v, Times (NaN MULTI_A, Load tmp)))) | 
| Chris@10 | 200 	    and delayed_stores = (v, Load tmp) :: delayed_stores in | 
| Chris@10 | 201 	      try | 
| Chris@10 | 202 		(Seq (i, | 
| Chris@10 | 203 		      Instr (Assign | 
| Chris@10 | 204 			       (List.hd buddies, | 
| Chris@10 | 205 				Times (NaN MULTI_B, | 
| Chris@10 | 206 				       Plus (List.map | 
| Chris@10 | 207 					       (fun buddy -> | 
| Chris@10 | 208 						  List.assq buddy | 
| Chris@10 | 209 						    delayed_stores) | 
| Chris@10 | 210 					       buddies))) ))) | 
| Chris@10 | 211 		  , delayed_stores | 
| Chris@10 | 212 	      with Not_found -> (i, delayed_stores) | 
| Chris@10 | 213 	  with Not_found -> (sched, delayed_stores) | 
| Chris@10 | 214 	end | 
| Chris@10 | 215     | Seq (a, b) -> | 
| Chris@10 | 216 	let (newa, delayed_stores) = recur a delayed_stores in | 
| Chris@10 | 217 	let (newb, delayed_stores) = recur b delayed_stores in | 
| Chris@10 | 218 	  (Seq (newa, newb), delayed_stores) | 
| Chris@10 | 219     | _ -> failwith "collect_buddy_stores" | 
| Chris@10 | 220   in let (sched, _) = recur sched [] in | 
| Chris@10 | 221     sched | 
| Chris@10 | 222 | 
| Chris@10 | 223 let schedule_for_pipeline sched = | 
| Chris@10 | 224   let update_readytimes t (Assign (v, _)) ready_times = | 
| Chris@10 | 225     (v, (t + !Magic.pipeline_latency)) :: ready_times | 
| Chris@10 | 226   and readyp t ready_times (Assign (_, x)) = | 
| Chris@10 | 227     List.for_all | 
| Chris@10 | 228       (fun var -> | 
| Chris@10 | 229 	 try | 
| Chris@10 | 230 	   (List.assq var ready_times) <= t | 
| Chris@10 | 231 	 with Not_found -> false) | 
| Chris@10 | 232       (List.filter Variable.is_temporary (Expr.find_vars x)) | 
| Chris@10 | 233   in | 
| Chris@10 | 234   let rec recur sched t ready_times delayed_instructions = | 
| Chris@10 | 235     let (ready, not_ready) = | 
| Chris@10 | 236       List.partition (readyp t ready_times) delayed_instructions | 
| Chris@10 | 237     in match ready with | 
| Chris@10 | 238       | a :: b -> | 
| Chris@10 | 239 	  let (sched, t, ready_times, delayed_instructions) = | 
| Chris@10 | 240 	    recur sched (t+1) (update_readytimes t a ready_times) | 
| Chris@10 | 241 	      (b @ not_ready) | 
| Chris@10 | 242 	  in | 
| Chris@10 | 243 	    (Seq (Instr a, sched)), t, ready_times, delayed_instructions | 
| Chris@10 | 244       | _ -> (match sched with | 
| Chris@10 | 245 		| Done -> (sched, t, ready_times, delayed_instructions) | 
| Chris@10 | 246 		| Instr a -> | 
| Chris@10 | 247 		    if (readyp t ready_times a) then | 
| Chris@10 | 248 		      (sched, (t+1), (update_readytimes t a ready_times), | 
| Chris@10 | 249 		       delayed_instructions) | 
| Chris@10 | 250 		    else | 
| Chris@10 | 251 		      (Done, t, ready_times, (a :: delayed_instructions)) | 
| Chris@10 | 252 		| Seq (a, b) -> | 
| Chris@10 | 253 		    let (a, t, ready_times, delayed_instructions) = | 
| Chris@10 | 254 		      recur a t ready_times delayed_instructions | 
| Chris@10 | 255 		    in | 
| Chris@10 | 256 		    let (b, t, ready_times, delayed_instructions) = | 
| Chris@10 | 257 		      recur b t ready_times delayed_instructions | 
| Chris@10 | 258 		    in (Seq (a, b)), t, ready_times, delayed_instructions | 
| Chris@10 | 259 	        | _ -> failwith "schedule_for_pipeline") | 
| Chris@10 | 260   in let rec recur_until_done sched t ready_times delayed_instructions = | 
| Chris@10 | 261       let (sched, t, ready_times, delayed_instructions) = | 
| Chris@10 | 262 	recur sched t ready_times delayed_instructions | 
| Chris@10 | 263       in match delayed_instructions with | 
| Chris@10 | 264 	| [] -> sched | 
| Chris@10 | 265 	| _ -> | 
| Chris@10 | 266 	    (Seq (sched, | 
| Chris@10 | 267 		  (recur_until_done Done (t+1) ready_times | 
| Chris@10 | 268 		     delayed_instructions))) | 
| Chris@10 | 269   in recur_until_done sched 0 [] [] | 
| Chris@10 | 270 | 
| Chris@10 | 271 let rec rewrite_declarations force_declarations | 
| Chris@10 | 272     (Annotate (_, _, declared, _, what)) = | 
| Chris@10 | 273   let m = !Magic.number_of_variables in | 
| Chris@10 | 274 | 
| Chris@10 | 275   let declare_it declared = | 
| Chris@10 | 276     if (force_declarations or List.length declared >= m) then | 
| Chris@10 | 277       ([], declared) | 
| Chris@10 | 278     else | 
| Chris@10 | 279       (declared, []) | 
| Chris@10 | 280 | 
| Chris@10 | 281   in match what with | 
| Chris@10 | 282     ADone -> Annotate ([], [], [], 0, what) | 
| Chris@10 | 283   | AInstr i -> | 
| Chris@10 | 284       let (u, d) = declare_it declared | 
| Chris@10 | 285       in Annotate ([], u, d, 0, what) | 
| Chris@10 | 286   | ASeq (a, b) -> | 
| Chris@10 | 287       let ma = rewrite_declarations false a | 
| Chris@10 | 288       and mb = rewrite_declarations false b | 
| Chris@10 | 289       in let Annotate (_, ua, _, _, _) = ma | 
| Chris@10 | 290       and Annotate (_, ub, _, _, _) = mb | 
| Chris@10 | 291       in let (u, d) = declare_it (declared @ ua @ ub) | 
| Chris@10 | 292       in Annotate ([], u, d, 0, ASeq (ma, mb)) | 
| Chris@10 | 293 | 
| Chris@10 | 294 let annotate list_of_buddy_stores schedule = | 
| Chris@10 | 295   let rec analyze live_at_end = function | 
| Chris@10 | 296       Done -> Annotate (live_at_end, [], [], 0, ADone) | 
| Chris@10 | 297     | Instr i -> (match i with | 
| Chris@10 | 298 	Assign (v, x) -> | 
| Chris@10 | 299 	  let vars = (find_vars x) in | 
| Chris@10 | 300 	  Annotate (Util.remove v (union live_at_end vars), [v], [], | 
| Chris@10 | 301 		    0, AInstr i)) | 
| Chris@10 | 302     | Seq (a, b) -> | 
| Chris@10 | 303 	let ab = analyze live_at_end b in | 
| Chris@10 | 304 	let Annotate (live_at_begin_b, defined_b, _, depth_a, _) = ab in | 
| Chris@10 | 305 	let aa = analyze live_at_begin_b a in | 
| Chris@10 | 306 	let Annotate (live_at_begin_a, defined_a, _, depth_b, _) = aa in | 
| Chris@10 | 307 	let defined = List.filter is_temporary (defined_a @ defined_b) in | 
| Chris@10 | 308 	let declarable = diff defined live_at_end in | 
| Chris@10 | 309 	let undeclarable = diff defined declarable | 
| Chris@10 | 310 	and maxdepth = max depth_a depth_b in | 
| Chris@10 | 311 	Annotate (live_at_begin_a, undeclarable, declarable, | 
| Chris@10 | 312 		  List.length declarable + maxdepth, | 
| Chris@10 | 313 		  ASeq (aa, ab)) | 
| Chris@10 | 314     | _ -> failwith "really_analyze" | 
| Chris@10 | 315 | 
| Chris@10 | 316   in | 
| Chris@10 | 317   let () = Util.info "begin annotate" in | 
| Chris@10 | 318   let x = linearize schedule in | 
| Chris@10 | 319 | 
| Chris@10 | 320   let x = | 
| Chris@10 | 321     if (!Magic.schedule_for_pipeline && !Magic.pipeline_latency > 0) then | 
| Chris@10 | 322       schedule_for_pipeline x | 
| Chris@10 | 323     else | 
| Chris@10 | 324       x | 
| Chris@10 | 325   in | 
| Chris@10 | 326 | 
| Chris@10 | 327   let x = | 
| Chris@10 | 328     if !Magic.reorder_insns then | 
| Chris@10 | 329       linearize(anticipate_friends use_same_vars x) | 
| Chris@10 | 330     else | 
| Chris@10 | 331       x | 
| Chris@10 | 332   in | 
| Chris@10 | 333 | 
| Chris@10 | 334   (* delay stores to the real and imaginary parts of the same number *) | 
| Chris@10 | 335   let x = | 
| Chris@10 | 336     if !Magic.reorder_stores then | 
| Chris@10 | 337       linearize(delay_friends store_to_same_class x) | 
| Chris@10 | 338     else | 
| Chris@10 | 339       x | 
| Chris@10 | 340   in | 
| Chris@10 | 341 | 
| Chris@10 | 342   (* move loads of the real and imaginary parts of the same number *) | 
| Chris@10 | 343   let x = | 
| Chris@10 | 344     if !Magic.reorder_loads then | 
| Chris@10 | 345       linearize(anticipate_friends loads_from_same_class x) | 
| Chris@10 | 346     else | 
| Chris@10 | 347       x | 
| Chris@10 | 348   in | 
| Chris@10 | 349 | 
| Chris@10 | 350   let x = collect_buddy_stores list_of_buddy_stores x in | 
| Chris@10 | 351   let x = analyze [] x in | 
| Chris@10 | 352   let res = rewrite_declarations true x in | 
| Chris@10 | 353   let () = Util.info "end annotate" in | 
| Chris@10 | 354   res | 
| Chris@10 | 355 | 
| Chris@10 | 356 let rec dump print (Annotate (_, _, _, _, code)) = | 
| Chris@10 | 357   dump_code print code | 
| Chris@10 | 358 and dump_code print = function | 
| Chris@10 | 359   | ADone -> () | 
| Chris@10 | 360   | AInstr x -> print ((assignment_to_string x) ^ "\n") | 
| Chris@10 | 361   | ASeq (a, b) -> dump print a; dump print b |