comparison src/fftw-3.3.3/genfft/annotate.ml @ 10:37bf6b4a2645

Add FFTW3
author Chris Cannam
date Wed, 20 Mar 2013 15:35:50 +0000
parents
children
comparison
equal deleted inserted replaced
9:c0fb53affa76 10:37bf6b4a2645
1 (*
2 * Copyright (c) 1997-1999 Massachusetts Institute of Technology
3 * Copyright (c) 2003, 2007-11 Matteo Frigo
4 * Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 *)
21
22 (* Here, we take a schedule (produced by schedule.ml) ordering a
23 sequence of instructions, and produce an annotated schedule. The
24 annotated schedule has the same ordering as the original schedule,
25 but is additionally partitioned into nested blocks of temporary
26 variables. The partitioning is computed via a heuristic algorithm.
27
28 The blocking allows the C code that we generate to consist of
29 nested blocks that help communicate variable lifetimes to the
30 compiler. *)
31
32 open Schedule
33 open Expr
34 open Variable
35
36 type annotated_schedule =
37 Annotate of variable list * variable list * variable list * int * aschedule
38 and aschedule =
39 ADone
40 | AInstr of assignment
41 | ASeq of (annotated_schedule * annotated_schedule)
42
43 let addelem a set = if not (List.memq a set) then a :: set else set
44 let union l =
45 let f x = addelem x (* let is source of polymorphism *)
46 in List.fold_right f l
47
48 (* set difference a - b *)
49 let diff a b = List.filter (fun x -> not (List.memq x b)) a
50
51 let rec minimize f = function
52 [] -> failwith "minimize"
53 | [n] -> n
54 | n :: rest ->
55 let x = minimize f rest in
56 if (f x) >= (f n) then n else x
57
58 (* find all variables used inside a scheduling unit *)
59 let rec find_block_vars = function
60 Done -> []
61 | (Instr (Assign (v, x))) -> v :: (find_vars x)
62 | Par a -> List.flatten (List.map find_block_vars a)
63 | Seq (a, b) -> (find_block_vars a) @ (find_block_vars b)
64
65 let uniq l =
66 List.fold_right (fun a b -> if List.memq a b then b else a :: b) l []
67
68 let has_related x = List.exists (Variable.same_class x)
69
70 let rec overlap a b = Util.count (fun y -> has_related y b) a
71
72 (* reorder a list of schedules so as to maximize overlap of variables *)
73 let reorder l =
74 let rec loop = function
75 [] -> []
76 | (a, va) :: b ->
77 let c =
78 List.map
79 (fun (a, x) -> ((a, x), (overlap va x, List.length x))) b in
80 let c' =
81 Sort.list
82 (fun (_, (a, la)) (_, (b, lb)) ->
83 la < lb or a > b)
84 c in
85 let b' = List.map (fun (a, _) -> a) c' in
86 a :: (loop b') in
87 let l' = List.map (fun x -> x, uniq (find_block_vars x)) l in
88 (* start with smallest block --- does this matter ? *)
89 match l' with
90 [] -> []
91 | _ ->
92 let m = minimize (fun (_, x) -> (List.length x)) l' in
93 let l'' = Util.remove m l' in
94 loop (m :: l'')
95
96 (* remove Par blocks *)
97 let rec linearize = function
98 | Seq (a, Done) -> linearize a
99 | Seq (Done, a) -> linearize a
100 | Seq (a, b) -> Seq (linearize a, linearize b)
101
102 (* try to balance nested Par blocks *)
103 | Par [a] -> linearize a
104 | Par l ->
105 let n2 = (List.length l) / 2 in
106 let rec loop n a b =
107 if n = 0 then
108 (List.rev b, a)
109 else
110 match a with
111 [] -> failwith "loop"
112 | x :: y -> loop (n - 1) y (x :: b)
113 in let (a, b) = loop n2 (reorder l) []
114 in linearize (Seq (Par a, Par b))
115
116 | x -> x
117
118 let subset a b =
119 List.for_all (fun x -> List.exists (fun y -> x == y) b) a
120
121 let use_same_vars (Assign (av, ax)) (Assign (bv, bx)) =
122 is_temporary av &&
123 is_temporary bv &&
124 (let va = Expr.find_vars ax and vb = Expr.find_vars bx in
125 subset va vb && subset vb va)
126
127 let store_to_same_class (Assign (av, ax)) (Assign (bv, bx)) =
128 is_locative av &&
129 is_locative bv &&
130 Variable.same_class av bv
131
132 let loads_from_same_class (Assign (av, ax)) (Assign (bv, bx)) =
133 match (ax, bx) with
134 | (Load a), (Load b) when
135 Variable.is_locative a && Variable.is_locative b
136 -> Variable.same_class a b
137 | _ -> false
138
139 (* extract instructions from schedule *)
140 let rec sched_to_ilist = function
141 | Done -> []
142 | Instr a -> [a]
143 | Seq (a, b) -> (sched_to_ilist a) @ (sched_to_ilist b)
144 | _ -> failwith "sched_to_ilist" (* Par blocks removed by linearize *)
145
146 let rec find_friends friendp insn friends foes = function
147 | [] -> (friends, foes)
148 | a :: b ->
149 if (a == insn) || (friendp a insn) then
150 find_friends friendp insn (a :: friends) foes b
151 else
152 find_friends friendp insn friends (a :: foes) b
153
154 (* schedule all instructions in the equivalence class determined
155 by friendp at the point where the last one
156 is executed *)
157 let rec delay_friends friendp sched =
158 let rec recur insns = function
159 | Done -> (Done, insns)
160 | Instr a ->
161 let (friends, foes) = find_friends friendp a [] [] insns in
162 (Schedule.sequentially friends), foes
163 | Seq (a, b) ->
164 let (b', insnsb) = recur insns b in
165 let (a', insnsa) = recur insnsb a in
166 (Seq (a', b')), insnsa
167 | _ -> failwith "delay_friends"
168 in match recur (sched_to_ilist sched) sched with
169 | (s, []) -> s (* assert that all insns have been used *)
170 | _ -> failwith "delay_friends"
171
172 (* schedule all instructions in the equivalence class determined
173 by friendp at the point where the first one
174 is executed *)
175 let rec anticipate_friends friendp sched =
176 let rec recur insns = function
177 | Done -> (Done, insns)
178 | Instr a ->
179 let (friends, foes) = find_friends friendp a [] [] insns in
180 (Schedule.sequentially friends), foes
181 | Seq (a, b) ->
182 let (a', insnsa) = recur insns a in
183 let (b', insnsb) = recur insnsa b in
184 (Seq (a', b')), insnsb
185 | _ -> failwith "anticipate_friends"
186 in match recur (sched_to_ilist sched) sched with
187 | (s, []) -> s (* assert that all insns have been used *)
188 | _ -> failwith "anticipate_friends"
189
190 let collect_buddy_stores buddy_list sched =
191 let rec recur sched delayed_stores = match sched with
192 | Done -> (sched, delayed_stores)
193 | Instr (Assign (v, x)) ->
194 begin
195 try
196 let buddies = List.find (List.memq v) buddy_list in
197 let tmp = Variable.make_temporary () in
198 let i = Seq(Instr (Assign (tmp, x)),
199 Instr (Assign (v, Times (NaN MULTI_A, Load tmp))))
200 and delayed_stores = (v, Load tmp) :: delayed_stores in
201 try
202 (Seq (i,
203 Instr (Assign
204 (List.hd buddies,
205 Times (NaN MULTI_B,
206 Plus (List.map
207 (fun buddy ->
208 List.assq buddy
209 delayed_stores)
210 buddies))) )))
211 , delayed_stores
212 with Not_found -> (i, delayed_stores)
213 with Not_found -> (sched, delayed_stores)
214 end
215 | Seq (a, b) ->
216 let (newa, delayed_stores) = recur a delayed_stores in
217 let (newb, delayed_stores) = recur b delayed_stores in
218 (Seq (newa, newb), delayed_stores)
219 | _ -> failwith "collect_buddy_stores"
220 in let (sched, _) = recur sched [] in
221 sched
222
223 let schedule_for_pipeline sched =
224 let update_readytimes t (Assign (v, _)) ready_times =
225 (v, (t + !Magic.pipeline_latency)) :: ready_times
226 and readyp t ready_times (Assign (_, x)) =
227 List.for_all
228 (fun var ->
229 try
230 (List.assq var ready_times) <= t
231 with Not_found -> false)
232 (List.filter Variable.is_temporary (Expr.find_vars x))
233 in
234 let rec recur sched t ready_times delayed_instructions =
235 let (ready, not_ready) =
236 List.partition (readyp t ready_times) delayed_instructions
237 in match ready with
238 | a :: b ->
239 let (sched, t, ready_times, delayed_instructions) =
240 recur sched (t+1) (update_readytimes t a ready_times)
241 (b @ not_ready)
242 in
243 (Seq (Instr a, sched)), t, ready_times, delayed_instructions
244 | _ -> (match sched with
245 | Done -> (sched, t, ready_times, delayed_instructions)
246 | Instr a ->
247 if (readyp t ready_times a) then
248 (sched, (t+1), (update_readytimes t a ready_times),
249 delayed_instructions)
250 else
251 (Done, t, ready_times, (a :: delayed_instructions))
252 | Seq (a, b) ->
253 let (a, t, ready_times, delayed_instructions) =
254 recur a t ready_times delayed_instructions
255 in
256 let (b, t, ready_times, delayed_instructions) =
257 recur b t ready_times delayed_instructions
258 in (Seq (a, b)), t, ready_times, delayed_instructions
259 | _ -> failwith "schedule_for_pipeline")
260 in let rec recur_until_done sched t ready_times delayed_instructions =
261 let (sched, t, ready_times, delayed_instructions) =
262 recur sched t ready_times delayed_instructions
263 in match delayed_instructions with
264 | [] -> sched
265 | _ ->
266 (Seq (sched,
267 (recur_until_done Done (t+1) ready_times
268 delayed_instructions)))
269 in recur_until_done sched 0 [] []
270
271 let rec rewrite_declarations force_declarations
272 (Annotate (_, _, declared, _, what)) =
273 let m = !Magic.number_of_variables in
274
275 let declare_it declared =
276 if (force_declarations or List.length declared >= m) then
277 ([], declared)
278 else
279 (declared, [])
280
281 in match what with
282 ADone -> Annotate ([], [], [], 0, what)
283 | AInstr i ->
284 let (u, d) = declare_it declared
285 in Annotate ([], u, d, 0, what)
286 | ASeq (a, b) ->
287 let ma = rewrite_declarations false a
288 and mb = rewrite_declarations false b
289 in let Annotate (_, ua, _, _, _) = ma
290 and Annotate (_, ub, _, _, _) = mb
291 in let (u, d) = declare_it (declared @ ua @ ub)
292 in Annotate ([], u, d, 0, ASeq (ma, mb))
293
294 let annotate list_of_buddy_stores schedule =
295 let rec analyze live_at_end = function
296 Done -> Annotate (live_at_end, [], [], 0, ADone)
297 | Instr i -> (match i with
298 Assign (v, x) ->
299 let vars = (find_vars x) in
300 Annotate (Util.remove v (union live_at_end vars), [v], [],
301 0, AInstr i))
302 | Seq (a, b) ->
303 let ab = analyze live_at_end b in
304 let Annotate (live_at_begin_b, defined_b, _, depth_a, _) = ab in
305 let aa = analyze live_at_begin_b a in
306 let Annotate (live_at_begin_a, defined_a, _, depth_b, _) = aa in
307 let defined = List.filter is_temporary (defined_a @ defined_b) in
308 let declarable = diff defined live_at_end in
309 let undeclarable = diff defined declarable
310 and maxdepth = max depth_a depth_b in
311 Annotate (live_at_begin_a, undeclarable, declarable,
312 List.length declarable + maxdepth,
313 ASeq (aa, ab))
314 | _ -> failwith "really_analyze"
315
316 in
317 let () = Util.info "begin annotate" in
318 let x = linearize schedule in
319
320 let x =
321 if (!Magic.schedule_for_pipeline && !Magic.pipeline_latency > 0) then
322 schedule_for_pipeline x
323 else
324 x
325 in
326
327 let x =
328 if !Magic.reorder_insns then
329 linearize(anticipate_friends use_same_vars x)
330 else
331 x
332 in
333
334 (* delay stores to the real and imaginary parts of the same number *)
335 let x =
336 if !Magic.reorder_stores then
337 linearize(delay_friends store_to_same_class x)
338 else
339 x
340 in
341
342 (* move loads of the real and imaginary parts of the same number *)
343 let x =
344 if !Magic.reorder_loads then
345 linearize(anticipate_friends loads_from_same_class x)
346 else
347 x
348 in
349
350 let x = collect_buddy_stores list_of_buddy_stores x in
351 let x = analyze [] x in
352 let res = rewrite_declarations true x in
353 let () = Util.info "end annotate" in
354 res
355
356 let rec dump print (Annotate (_, _, _, _, code)) =
357 dump_code print code
358 and dump_code print = function
359 | ADone -> ()
360 | AInstr x -> print ((assignment_to_string x) ^ "\n")
361 | ASeq (a, b) -> dump print a; dump print b