Mercurial > hg > sv-dependency-builds
comparison src/fftw-3.3.3/genfft/annotate.ml @ 10:37bf6b4a2645
Add FFTW3
author | Chris Cannam |
---|---|
date | Wed, 20 Mar 2013 15:35:50 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
9:c0fb53affa76 | 10:37bf6b4a2645 |
---|---|
1 (* | |
2 * Copyright (c) 1997-1999 Massachusetts Institute of Technology | |
3 * Copyright (c) 2003, 2007-11 Matteo Frigo | |
4 * Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology | |
5 * | |
6 * This program is free software; you can redistribute it and/or modify | |
7 * it under the terms of the GNU General Public License as published by | |
8 * the Free Software Foundation; either version 2 of the License, or | |
9 * (at your option) any later version. | |
10 * | |
11 * This program is distributed in the hope that it will be useful, | |
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 * GNU General Public License for more details. | |
15 * | |
16 * You should have received a copy of the GNU General Public License | |
17 * along with this program; if not, write to the Free Software | |
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
19 * | |
20 *) | |
21 | |
22 (* Here, we take a schedule (produced by schedule.ml) ordering a | |
23 sequence of instructions, and produce an annotated schedule. The | |
24 annotated schedule has the same ordering as the original schedule, | |
25 but is additionally partitioned into nested blocks of temporary | |
26 variables. The partitioning is computed via a heuristic algorithm. | |
27 | |
28 The blocking allows the C code that we generate to consist of | |
29 nested blocks that help communicate variable lifetimes to the | |
30 compiler. *) | |
31 | |
32 open Schedule | |
33 open Expr | |
34 open Variable | |
35 | |
36 type annotated_schedule = | |
37 Annotate of variable list * variable list * variable list * int * aschedule | |
38 and aschedule = | |
39 ADone | |
40 | AInstr of assignment | |
41 | ASeq of (annotated_schedule * annotated_schedule) | |
42 | |
43 let addelem a set = if not (List.memq a set) then a :: set else set | |
44 let union l = | |
45 let f x = addelem x (* let is source of polymorphism *) | |
46 in List.fold_right f l | |
47 | |
48 (* set difference a - b *) | |
49 let diff a b = List.filter (fun x -> not (List.memq x b)) a | |
50 | |
51 let rec minimize f = function | |
52 [] -> failwith "minimize" | |
53 | [n] -> n | |
54 | n :: rest -> | |
55 let x = minimize f rest in | |
56 if (f x) >= (f n) then n else x | |
57 | |
58 (* find all variables used inside a scheduling unit *) | |
59 let rec find_block_vars = function | |
60 Done -> [] | |
61 | (Instr (Assign (v, x))) -> v :: (find_vars x) | |
62 | Par a -> List.flatten (List.map find_block_vars a) | |
63 | Seq (a, b) -> (find_block_vars a) @ (find_block_vars b) | |
64 | |
65 let uniq l = | |
66 List.fold_right (fun a b -> if List.memq a b then b else a :: b) l [] | |
67 | |
68 let has_related x = List.exists (Variable.same_class x) | |
69 | |
70 let rec overlap a b = Util.count (fun y -> has_related y b) a | |
71 | |
72 (* reorder a list of schedules so as to maximize overlap of variables *) | |
73 let reorder l = | |
74 let rec loop = function | |
75 [] -> [] | |
76 | (a, va) :: b -> | |
77 let c = | |
78 List.map | |
79 (fun (a, x) -> ((a, x), (overlap va x, List.length x))) b in | |
80 let c' = | |
81 Sort.list | |
82 (fun (_, (a, la)) (_, (b, lb)) -> | |
83 la < lb or a > b) | |
84 c in | |
85 let b' = List.map (fun (a, _) -> a) c' in | |
86 a :: (loop b') in | |
87 let l' = List.map (fun x -> x, uniq (find_block_vars x)) l in | |
88 (* start with smallest block --- does this matter ? *) | |
89 match l' with | |
90 [] -> [] | |
91 | _ -> | |
92 let m = minimize (fun (_, x) -> (List.length x)) l' in | |
93 let l'' = Util.remove m l' in | |
94 loop (m :: l'') | |
95 | |
96 (* remove Par blocks *) | |
97 let rec linearize = function | |
98 | Seq (a, Done) -> linearize a | |
99 | Seq (Done, a) -> linearize a | |
100 | Seq (a, b) -> Seq (linearize a, linearize b) | |
101 | |
102 (* try to balance nested Par blocks *) | |
103 | Par [a] -> linearize a | |
104 | Par l -> | |
105 let n2 = (List.length l) / 2 in | |
106 let rec loop n a b = | |
107 if n = 0 then | |
108 (List.rev b, a) | |
109 else | |
110 match a with | |
111 [] -> failwith "loop" | |
112 | x :: y -> loop (n - 1) y (x :: b) | |
113 in let (a, b) = loop n2 (reorder l) [] | |
114 in linearize (Seq (Par a, Par b)) | |
115 | |
116 | x -> x | |
117 | |
118 let subset a b = | |
119 List.for_all (fun x -> List.exists (fun y -> x == y) b) a | |
120 | |
121 let use_same_vars (Assign (av, ax)) (Assign (bv, bx)) = | |
122 is_temporary av && | |
123 is_temporary bv && | |
124 (let va = Expr.find_vars ax and vb = Expr.find_vars bx in | |
125 subset va vb && subset vb va) | |
126 | |
127 let store_to_same_class (Assign (av, ax)) (Assign (bv, bx)) = | |
128 is_locative av && | |
129 is_locative bv && | |
130 Variable.same_class av bv | |
131 | |
132 let loads_from_same_class (Assign (av, ax)) (Assign (bv, bx)) = | |
133 match (ax, bx) with | |
134 | (Load a), (Load b) when | |
135 Variable.is_locative a && Variable.is_locative b | |
136 -> Variable.same_class a b | |
137 | _ -> false | |
138 | |
139 (* extract instructions from schedule *) | |
140 let rec sched_to_ilist = function | |
141 | Done -> [] | |
142 | Instr a -> [a] | |
143 | Seq (a, b) -> (sched_to_ilist a) @ (sched_to_ilist b) | |
144 | _ -> failwith "sched_to_ilist" (* Par blocks removed by linearize *) | |
145 | |
146 let rec find_friends friendp insn friends foes = function | |
147 | [] -> (friends, foes) | |
148 | a :: b -> | |
149 if (a == insn) || (friendp a insn) then | |
150 find_friends friendp insn (a :: friends) foes b | |
151 else | |
152 find_friends friendp insn friends (a :: foes) b | |
153 | |
154 (* schedule all instructions in the equivalence class determined | |
155 by friendp at the point where the last one | |
156 is executed *) | |
157 let rec delay_friends friendp sched = | |
158 let rec recur insns = function | |
159 | Done -> (Done, insns) | |
160 | Instr a -> | |
161 let (friends, foes) = find_friends friendp a [] [] insns in | |
162 (Schedule.sequentially friends), foes | |
163 | Seq (a, b) -> | |
164 let (b', insnsb) = recur insns b in | |
165 let (a', insnsa) = recur insnsb a in | |
166 (Seq (a', b')), insnsa | |
167 | _ -> failwith "delay_friends" | |
168 in match recur (sched_to_ilist sched) sched with | |
169 | (s, []) -> s (* assert that all insns have been used *) | |
170 | _ -> failwith "delay_friends" | |
171 | |
172 (* schedule all instructions in the equivalence class determined | |
173 by friendp at the point where the first one | |
174 is executed *) | |
175 let rec anticipate_friends friendp sched = | |
176 let rec recur insns = function | |
177 | Done -> (Done, insns) | |
178 | Instr a -> | |
179 let (friends, foes) = find_friends friendp a [] [] insns in | |
180 (Schedule.sequentially friends), foes | |
181 | Seq (a, b) -> | |
182 let (a', insnsa) = recur insns a in | |
183 let (b', insnsb) = recur insnsa b in | |
184 (Seq (a', b')), insnsb | |
185 | _ -> failwith "anticipate_friends" | |
186 in match recur (sched_to_ilist sched) sched with | |
187 | (s, []) -> s (* assert that all insns have been used *) | |
188 | _ -> failwith "anticipate_friends" | |
189 | |
190 let collect_buddy_stores buddy_list sched = | |
191 let rec recur sched delayed_stores = match sched with | |
192 | Done -> (sched, delayed_stores) | |
193 | Instr (Assign (v, x)) -> | |
194 begin | |
195 try | |
196 let buddies = List.find (List.memq v) buddy_list in | |
197 let tmp = Variable.make_temporary () in | |
198 let i = Seq(Instr (Assign (tmp, x)), | |
199 Instr (Assign (v, Times (NaN MULTI_A, Load tmp)))) | |
200 and delayed_stores = (v, Load tmp) :: delayed_stores in | |
201 try | |
202 (Seq (i, | |
203 Instr (Assign | |
204 (List.hd buddies, | |
205 Times (NaN MULTI_B, | |
206 Plus (List.map | |
207 (fun buddy -> | |
208 List.assq buddy | |
209 delayed_stores) | |
210 buddies))) ))) | |
211 , delayed_stores | |
212 with Not_found -> (i, delayed_stores) | |
213 with Not_found -> (sched, delayed_stores) | |
214 end | |
215 | Seq (a, b) -> | |
216 let (newa, delayed_stores) = recur a delayed_stores in | |
217 let (newb, delayed_stores) = recur b delayed_stores in | |
218 (Seq (newa, newb), delayed_stores) | |
219 | _ -> failwith "collect_buddy_stores" | |
220 in let (sched, _) = recur sched [] in | |
221 sched | |
222 | |
223 let schedule_for_pipeline sched = | |
224 let update_readytimes t (Assign (v, _)) ready_times = | |
225 (v, (t + !Magic.pipeline_latency)) :: ready_times | |
226 and readyp t ready_times (Assign (_, x)) = | |
227 List.for_all | |
228 (fun var -> | |
229 try | |
230 (List.assq var ready_times) <= t | |
231 with Not_found -> false) | |
232 (List.filter Variable.is_temporary (Expr.find_vars x)) | |
233 in | |
234 let rec recur sched t ready_times delayed_instructions = | |
235 let (ready, not_ready) = | |
236 List.partition (readyp t ready_times) delayed_instructions | |
237 in match ready with | |
238 | a :: b -> | |
239 let (sched, t, ready_times, delayed_instructions) = | |
240 recur sched (t+1) (update_readytimes t a ready_times) | |
241 (b @ not_ready) | |
242 in | |
243 (Seq (Instr a, sched)), t, ready_times, delayed_instructions | |
244 | _ -> (match sched with | |
245 | Done -> (sched, t, ready_times, delayed_instructions) | |
246 | Instr a -> | |
247 if (readyp t ready_times a) then | |
248 (sched, (t+1), (update_readytimes t a ready_times), | |
249 delayed_instructions) | |
250 else | |
251 (Done, t, ready_times, (a :: delayed_instructions)) | |
252 | Seq (a, b) -> | |
253 let (a, t, ready_times, delayed_instructions) = | |
254 recur a t ready_times delayed_instructions | |
255 in | |
256 let (b, t, ready_times, delayed_instructions) = | |
257 recur b t ready_times delayed_instructions | |
258 in (Seq (a, b)), t, ready_times, delayed_instructions | |
259 | _ -> failwith "schedule_for_pipeline") | |
260 in let rec recur_until_done sched t ready_times delayed_instructions = | |
261 let (sched, t, ready_times, delayed_instructions) = | |
262 recur sched t ready_times delayed_instructions | |
263 in match delayed_instructions with | |
264 | [] -> sched | |
265 | _ -> | |
266 (Seq (sched, | |
267 (recur_until_done Done (t+1) ready_times | |
268 delayed_instructions))) | |
269 in recur_until_done sched 0 [] [] | |
270 | |
271 let rec rewrite_declarations force_declarations | |
272 (Annotate (_, _, declared, _, what)) = | |
273 let m = !Magic.number_of_variables in | |
274 | |
275 let declare_it declared = | |
276 if (force_declarations or List.length declared >= m) then | |
277 ([], declared) | |
278 else | |
279 (declared, []) | |
280 | |
281 in match what with | |
282 ADone -> Annotate ([], [], [], 0, what) | |
283 | AInstr i -> | |
284 let (u, d) = declare_it declared | |
285 in Annotate ([], u, d, 0, what) | |
286 | ASeq (a, b) -> | |
287 let ma = rewrite_declarations false a | |
288 and mb = rewrite_declarations false b | |
289 in let Annotate (_, ua, _, _, _) = ma | |
290 and Annotate (_, ub, _, _, _) = mb | |
291 in let (u, d) = declare_it (declared @ ua @ ub) | |
292 in Annotate ([], u, d, 0, ASeq (ma, mb)) | |
293 | |
294 let annotate list_of_buddy_stores schedule = | |
295 let rec analyze live_at_end = function | |
296 Done -> Annotate (live_at_end, [], [], 0, ADone) | |
297 | Instr i -> (match i with | |
298 Assign (v, x) -> | |
299 let vars = (find_vars x) in | |
300 Annotate (Util.remove v (union live_at_end vars), [v], [], | |
301 0, AInstr i)) | |
302 | Seq (a, b) -> | |
303 let ab = analyze live_at_end b in | |
304 let Annotate (live_at_begin_b, defined_b, _, depth_a, _) = ab in | |
305 let aa = analyze live_at_begin_b a in | |
306 let Annotate (live_at_begin_a, defined_a, _, depth_b, _) = aa in | |
307 let defined = List.filter is_temporary (defined_a @ defined_b) in | |
308 let declarable = diff defined live_at_end in | |
309 let undeclarable = diff defined declarable | |
310 and maxdepth = max depth_a depth_b in | |
311 Annotate (live_at_begin_a, undeclarable, declarable, | |
312 List.length declarable + maxdepth, | |
313 ASeq (aa, ab)) | |
314 | _ -> failwith "really_analyze" | |
315 | |
316 in | |
317 let () = Util.info "begin annotate" in | |
318 let x = linearize schedule in | |
319 | |
320 let x = | |
321 if (!Magic.schedule_for_pipeline && !Magic.pipeline_latency > 0) then | |
322 schedule_for_pipeline x | |
323 else | |
324 x | |
325 in | |
326 | |
327 let x = | |
328 if !Magic.reorder_insns then | |
329 linearize(anticipate_friends use_same_vars x) | |
330 else | |
331 x | |
332 in | |
333 | |
334 (* delay stores to the real and imaginary parts of the same number *) | |
335 let x = | |
336 if !Magic.reorder_stores then | |
337 linearize(delay_friends store_to_same_class x) | |
338 else | |
339 x | |
340 in | |
341 | |
342 (* move loads of the real and imaginary parts of the same number *) | |
343 let x = | |
344 if !Magic.reorder_loads then | |
345 linearize(anticipate_friends loads_from_same_class x) | |
346 else | |
347 x | |
348 in | |
349 | |
350 let x = collect_buddy_stores list_of_buddy_stores x in | |
351 let x = analyze [] x in | |
352 let res = rewrite_declarations true x in | |
353 let () = Util.info "end annotate" in | |
354 res | |
355 | |
356 let rec dump print (Annotate (_, _, _, _, code)) = | |
357 dump_code print code | |
358 and dump_code print = function | |
359 | ADone -> () | |
360 | AInstr x -> print ((assignment_to_string x) ^ "\n") | |
361 | ASeq (a, b) -> dump print a; dump print b |