cannam@127
|
1 (*
|
cannam@127
|
2 * Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
cannam@127
|
3 * Copyright (c) 2003, 2007-14 Matteo Frigo
|
cannam@127
|
4 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
cannam@127
|
5 *
|
cannam@127
|
6 * This program is free software; you can redistribute it and/or modify
|
cannam@127
|
7 * it under the terms of the GNU General Public License as published by
|
cannam@127
|
8 * the Free Software Foundation; either version 2 of the License, or
|
cannam@127
|
9 * (at your option) any later version.
|
cannam@127
|
10 *
|
cannam@127
|
11 * This program is distributed in the hope that it will be useful,
|
cannam@127
|
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
cannam@127
|
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
cannam@127
|
14 * GNU General Public License for more details.
|
cannam@127
|
15 *
|
cannam@127
|
16 * You should have received a copy of the GNU General Public License
|
cannam@127
|
17 * along with this program; if not, write to the Free Software
|
cannam@127
|
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
cannam@127
|
19 *
|
cannam@127
|
20 *)
|
cannam@127
|
21
|
cannam@127
|
22 (* generation of trigonometric transforms *)
|
cannam@127
|
23
|
cannam@127
|
24 open Util
|
cannam@127
|
25 open Genutil
|
cannam@127
|
26 open C
|
cannam@127
|
27
|
cannam@127
|
28
|
cannam@127
|
29 let usage = "Usage: " ^ Sys.argv.(0) ^ " -n <number>"
|
cannam@127
|
30
|
cannam@127
|
31 let uistride = ref Stride_variable
|
cannam@127
|
32 let uostride = ref Stride_variable
|
cannam@127
|
33 let uivstride = ref Stride_variable
|
cannam@127
|
34 let uovstride = ref Stride_variable
|
cannam@127
|
35 let normalization = ref 1
|
cannam@127
|
36
|
cannam@127
|
37 type mode =
|
cannam@127
|
38 | MDCT
|
cannam@127
|
39 | MDCT_MP3
|
cannam@127
|
40 | MDCT_VORBIS
|
cannam@127
|
41 | MDCT_WINDOW
|
cannam@127
|
42 | MDCT_WINDOW_SYM
|
cannam@127
|
43 | IMDCT
|
cannam@127
|
44 | IMDCT_MP3
|
cannam@127
|
45 | IMDCT_VORBIS
|
cannam@127
|
46 | IMDCT_WINDOW
|
cannam@127
|
47 | IMDCT_WINDOW_SYM
|
cannam@127
|
48 | NONE
|
cannam@127
|
49
|
cannam@127
|
50 let mode = ref NONE
|
cannam@127
|
51
|
cannam@127
|
52 let speclist = [
|
cannam@127
|
53 "-with-istride",
|
cannam@127
|
54 Arg.String(fun x -> uistride := arg_to_stride x),
|
cannam@127
|
55 " specialize for given input stride";
|
cannam@127
|
56
|
cannam@127
|
57 "-with-ostride",
|
cannam@127
|
58 Arg.String(fun x -> uostride := arg_to_stride x),
|
cannam@127
|
59 " specialize for given output stride";
|
cannam@127
|
60
|
cannam@127
|
61 "-with-ivstride",
|
cannam@127
|
62 Arg.String(fun x -> uivstride := arg_to_stride x),
|
cannam@127
|
63 " specialize for given input vector stride";
|
cannam@127
|
64
|
cannam@127
|
65 "-with-ovstride",
|
cannam@127
|
66 Arg.String(fun x -> uovstride := arg_to_stride x),
|
cannam@127
|
67 " specialize for given output vector stride";
|
cannam@127
|
68
|
cannam@127
|
69 "-normalization",
|
cannam@127
|
70 Arg.String(fun x -> normalization := int_of_string x),
|
cannam@127
|
71 " normalization integer to divide by";
|
cannam@127
|
72
|
cannam@127
|
73 "-mdct",
|
cannam@127
|
74 Arg.Unit(fun () -> mode := MDCT),
|
cannam@127
|
75 " generate an MDCT codelet";
|
cannam@127
|
76
|
cannam@127
|
77 "-mdct-mp3",
|
cannam@127
|
78 Arg.Unit(fun () -> mode := MDCT_MP3),
|
cannam@127
|
79 " generate an MDCT codelet with MP3 windowing";
|
cannam@127
|
80
|
cannam@127
|
81 "-mdct-window",
|
cannam@127
|
82 Arg.Unit(fun () -> mode := MDCT_WINDOW),
|
cannam@127
|
83 " generate an MDCT codelet with window array";
|
cannam@127
|
84
|
cannam@127
|
85 "-mdct-window-sym",
|
cannam@127
|
86 Arg.Unit(fun () -> mode := MDCT_WINDOW_SYM),
|
cannam@127
|
87 " generate an MDCT codelet with symmetric window array";
|
cannam@127
|
88
|
cannam@127
|
89 "-imdct",
|
cannam@127
|
90 Arg.Unit(fun () -> mode := IMDCT),
|
cannam@127
|
91 " generate an IMDCT codelet";
|
cannam@127
|
92
|
cannam@127
|
93 "-imdct-mp3",
|
cannam@127
|
94 Arg.Unit(fun () -> mode := IMDCT_MP3),
|
cannam@127
|
95 " generate an IMDCT codelet with MP3 windowing";
|
cannam@127
|
96
|
cannam@127
|
97 "-imdct-window",
|
cannam@127
|
98 Arg.Unit(fun () -> mode := IMDCT_WINDOW),
|
cannam@127
|
99 " generate an IMDCT codelet with window array";
|
cannam@127
|
100
|
cannam@127
|
101 "-imdct-window-sym",
|
cannam@127
|
102 Arg.Unit(fun () -> mode := IMDCT_WINDOW_SYM),
|
cannam@127
|
103 " generate an IMDCT codelet with symmetric window array";
|
cannam@127
|
104 ]
|
cannam@127
|
105
|
cannam@127
|
106 let unity_window n i = Complex.one
|
cannam@127
|
107
|
cannam@127
|
108 (* MP3 window(k) = sin(pi/(2n) * (k + 1/2)) *)
|
cannam@127
|
109 let mp3_window n k =
|
cannam@127
|
110 Complex.imag (Complex.exp (8 * n) (2*k + 1))
|
cannam@127
|
111
|
cannam@127
|
112 (* Vorbis window(k) = sin(pi/2 * (mp3_window(k))^2)
|
cannam@127
|
113 ... this is transcendental, though, so we can't do it with our
|
cannam@127
|
114 current Complex.exp function *)
|
cannam@127
|
115
|
cannam@127
|
116 let window_array n w =
|
cannam@127
|
117 array n (fun i ->
|
cannam@127
|
118 let stride = C.SInteger 1
|
cannam@127
|
119 and klass = Unique.make () in
|
cannam@127
|
120 let refr = C.array_subscript w stride i in
|
cannam@127
|
121 let kr = Variable.make_constant klass refr in
|
cannam@127
|
122 load_r (kr, kr))
|
cannam@127
|
123
|
cannam@127
|
124 let load_window w n i = w i
|
cannam@127
|
125 let load_window_sym w n i = w (if (i < n) then i else (2*n - 1 - i))
|
cannam@127
|
126
|
cannam@127
|
127 (* fixme: use same locations for input and output so that it works in-place? *)
|
cannam@127
|
128
|
cannam@127
|
129 (* Note: only correct for even n! *)
|
cannam@127
|
130 let load_array_mdct window n rarr iarr locations =
|
cannam@127
|
131 let twon = 2 * n in
|
cannam@127
|
132 let arr = load_array_c twon
|
cannam@127
|
133 (locative_array_c twon rarr iarr locations "BUG") in
|
cannam@127
|
134 let arrw = fun i -> Complex.times (window n i) (arr i) in
|
cannam@127
|
135 array n
|
cannam@127
|
136 ((Complex.times Complex.half) @@
|
cannam@127
|
137 (fun i ->
|
cannam@127
|
138 if (i < n/2) then
|
cannam@127
|
139 Complex.uminus (Complex.plus [arrw (i + n + n/2);
|
cannam@127
|
140 arrw (n + n/2 - 1 - i)])
|
cannam@127
|
141 else
|
cannam@127
|
142 Complex.plus [arrw (i - n/2);
|
cannam@127
|
143 Complex.uminus (arrw (n + n/2 - 1 - i))]))
|
cannam@127
|
144
|
cannam@127
|
145 let store_array_mdct window n rarr iarr locations arr =
|
cannam@127
|
146 store_array_r n (locative_array_c n rarr iarr locations "BUG") arr
|
cannam@127
|
147
|
cannam@127
|
148 let load_array_imdct window n rarr iarr locations =
|
cannam@127
|
149 load_array_c n (locative_array_c n rarr iarr locations "BUG")
|
cannam@127
|
150
|
cannam@127
|
151 let store_array_imdct window n rarr iarr locations arr =
|
cannam@127
|
152 let n2 = n/2 in
|
cannam@127
|
153 let threen2 = 3*n2 in
|
cannam@127
|
154 let arr2 = fun i ->
|
cannam@127
|
155 if (i < n2) then
|
cannam@127
|
156 arr (i + n2)
|
cannam@127
|
157 else if (i < threen2) then
|
cannam@127
|
158 Complex.uminus (arr (threen2 - 1 - i))
|
cannam@127
|
159 else
|
cannam@127
|
160 Complex.uminus (arr (i - threen2))
|
cannam@127
|
161 in
|
cannam@127
|
162 let arr2w = fun i -> Complex.times (window n i) (arr2 i) in
|
cannam@127
|
163 let twon = 2 * n in
|
cannam@127
|
164 store_array_r twon (locative_array_c twon rarr iarr locations "BUG") arr2w
|
cannam@127
|
165
|
cannam@127
|
166 let window_param = function
|
cannam@127
|
167 MDCT_WINDOW -> true
|
cannam@127
|
168 | MDCT_WINDOW_SYM -> true
|
cannam@127
|
169 | IMDCT_WINDOW -> true
|
cannam@127
|
170 | IMDCT_WINDOW_SYM -> true
|
cannam@127
|
171 | _ -> false
|
cannam@127
|
172
|
cannam@127
|
173 let generate n mode =
|
cannam@127
|
174 let iarray = "I"
|
cannam@127
|
175 and oarray = "O"
|
cannam@127
|
176 and istride = "istride"
|
cannam@127
|
177 and ostride = "ostride"
|
cannam@127
|
178 and window = "W"
|
cannam@127
|
179 and name = !Magic.codelet_name in
|
cannam@127
|
180
|
cannam@127
|
181 let vistride = either_stride (!uistride) (C.SVar istride)
|
cannam@127
|
182 and vostride = either_stride (!uostride) (C.SVar ostride)
|
cannam@127
|
183 in
|
cannam@127
|
184
|
cannam@127
|
185 let sivs = stride_to_string "ovs" !uovstride in
|
cannam@127
|
186 let sovs = stride_to_string "ivs" !uivstride in
|
cannam@127
|
187
|
cannam@127
|
188 let (transform, load_input, store_output) = match mode with
|
cannam@127
|
189 | MDCT -> Trig.dctIV, load_array_mdct unity_window,
|
cannam@127
|
190 store_array_mdct unity_window
|
cannam@127
|
191 | MDCT_MP3 -> Trig.dctIV, load_array_mdct mp3_window,
|
cannam@127
|
192 store_array_mdct unity_window
|
cannam@127
|
193 | MDCT_WINDOW -> Trig.dctIV, load_array_mdct
|
cannam@127
|
194 (load_window (window_array (2 * n) window)),
|
cannam@127
|
195 store_array_mdct unity_window
|
cannam@127
|
196 | MDCT_WINDOW_SYM -> Trig.dctIV, load_array_mdct
|
cannam@127
|
197 (load_window_sym (window_array n window)),
|
cannam@127
|
198 store_array_mdct unity_window
|
cannam@127
|
199 | IMDCT -> Trig.dctIV, load_array_imdct unity_window,
|
cannam@127
|
200 store_array_imdct unity_window
|
cannam@127
|
201 | IMDCT_MP3 -> Trig.dctIV, load_array_imdct unity_window,
|
cannam@127
|
202 store_array_imdct mp3_window
|
cannam@127
|
203 | IMDCT_WINDOW -> Trig.dctIV, load_array_imdct unity_window,
|
cannam@127
|
204 store_array_imdct (load_window (window_array (2 * n) window))
|
cannam@127
|
205 | IMDCT_WINDOW_SYM -> Trig.dctIV, load_array_imdct unity_window,
|
cannam@127
|
206 store_array_imdct (load_window_sym (window_array n window))
|
cannam@127
|
207 | _ -> failwith "must specify transform kind"
|
cannam@127
|
208 in
|
cannam@127
|
209
|
cannam@127
|
210 let locations = unique_array_c (2*n) in
|
cannam@127
|
211 let input =
|
cannam@127
|
212 load_input n
|
cannam@127
|
213 (C.array_subscript iarray vistride)
|
cannam@127
|
214 (C.array_subscript "BUG" vistride)
|
cannam@127
|
215 locations
|
cannam@127
|
216 in
|
cannam@127
|
217 let output = (Complex.times (Complex.inverse_int !normalization))
|
cannam@127
|
218 @@ (transform n input) in
|
cannam@127
|
219 let odag =
|
cannam@127
|
220 store_output n
|
cannam@127
|
221 (C.array_subscript oarray vostride)
|
cannam@127
|
222 (C.array_subscript "BUG" vostride)
|
cannam@127
|
223 locations
|
cannam@127
|
224 output
|
cannam@127
|
225 in
|
cannam@127
|
226 let annot = standard_optimizer odag in
|
cannam@127
|
227
|
cannam@127
|
228 let tree =
|
cannam@127
|
229 Fcn ("void", name,
|
cannam@127
|
230 ([Decl (C.constrealtypep, iarray);
|
cannam@127
|
231 Decl (C.realtypep, oarray)]
|
cannam@127
|
232 @ (if stride_fixed !uistride then []
|
cannam@127
|
233 else [Decl (C.stridetype, istride)])
|
cannam@127
|
234 @ (if stride_fixed !uostride then []
|
cannam@127
|
235 else [Decl (C.stridetype, ostride)])
|
cannam@127
|
236 @ (choose_simd []
|
cannam@127
|
237 (if stride_fixed !uivstride then [] else
|
cannam@127
|
238 [Decl ("int", sivs)]))
|
cannam@127
|
239 @ (choose_simd []
|
cannam@127
|
240 (if stride_fixed !uovstride then [] else
|
cannam@127
|
241 [Decl ("int", sovs)]))
|
cannam@127
|
242 @ (if (not (window_param mode)) then []
|
cannam@127
|
243 else [Decl (C.constrealtypep, window)])
|
cannam@127
|
244 ),
|
cannam@127
|
245 finalize_fcn (Asch annot))
|
cannam@127
|
246
|
cannam@127
|
247 in
|
cannam@127
|
248 (unparse tree) ^ "\n"
|
cannam@127
|
249
|
cannam@127
|
250
|
cannam@127
|
251 let main () =
|
cannam@127
|
252 begin
|
cannam@127
|
253 parse speclist usage;
|
cannam@127
|
254 print_string (generate (check_size ()) !mode);
|
cannam@127
|
255 end
|
cannam@127
|
256
|
cannam@127
|
257 let _ = main()
|