cannam@95: (* cannam@95: * Copyright (c) 1997-1999 Massachusetts Institute of Technology cannam@95: * Copyright (c) 2003, 2007-11 Matteo Frigo cannam@95: * Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology cannam@95: * cannam@95: * This program is free software; you can redistribute it and/or modify cannam@95: * it under the terms of the GNU General Public License as published by cannam@95: * the Free Software Foundation; either version 2 of the License, or cannam@95: * (at your option) any later version. cannam@95: * cannam@95: * This program is distributed in the hope that it will be useful, cannam@95: * but WITHOUT ANY WARRANTY; without even the implied warranty of cannam@95: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the cannam@95: * GNU General Public License for more details. cannam@95: * cannam@95: * You should have received a copy of the GNU General Public License cannam@95: * along with this program; if not, write to the Free Software cannam@95: * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA cannam@95: * cannam@95: *) cannam@95: cannam@95: (* generation of trigonometric transforms *) cannam@95: cannam@95: open Util cannam@95: open Genutil cannam@95: open C cannam@95: cannam@95: cannam@95: let usage = "Usage: " ^ Sys.argv.(0) ^ " -n " cannam@95: cannam@95: let uistride = ref Stride_variable cannam@95: let uostride = ref Stride_variable cannam@95: let uivstride = ref Stride_variable cannam@95: let uovstride = ref Stride_variable cannam@95: cannam@95: type mode = cannam@95: | RDFT cannam@95: | HDFT cannam@95: | DHT cannam@95: | REDFT00 cannam@95: | REDFT10 cannam@95: | REDFT01 cannam@95: | REDFT11 cannam@95: | RODFT00 cannam@95: | RODFT10 cannam@95: | RODFT01 cannam@95: | RODFT11 cannam@95: | NONE cannam@95: cannam@95: let mode = ref NONE cannam@95: let normsqr = ref 1 cannam@95: let unitary = ref false cannam@95: let noloop = ref false cannam@95: cannam@95: let speclist = [ cannam@95: "-with-istride", cannam@95: Arg.String(fun x -> uistride := arg_to_stride x), cannam@95: " specialize for given input stride"; cannam@95: cannam@95: "-with-ostride", cannam@95: Arg.String(fun x -> uostride := arg_to_stride x), cannam@95: " specialize for given output stride"; cannam@95: cannam@95: "-with-ivstride", cannam@95: Arg.String(fun x -> uivstride := arg_to_stride x), cannam@95: " specialize for given input vector stride"; cannam@95: cannam@95: "-with-ovstride", cannam@95: Arg.String(fun x -> uovstride := arg_to_stride x), cannam@95: " specialize for given output vector stride"; cannam@95: cannam@95: "-rdft", cannam@95: Arg.Unit(fun () -> mode := RDFT), cannam@95: " generate a real DFT codelet"; cannam@95: cannam@95: "-hdft", cannam@95: Arg.Unit(fun () -> mode := HDFT), cannam@95: " generate a Hermitian DFT codelet"; cannam@95: cannam@95: "-dht", cannam@95: Arg.Unit(fun () -> mode := DHT), cannam@95: " generate a DHT codelet"; cannam@95: cannam@95: "-redft00", cannam@95: Arg.Unit(fun () -> mode := REDFT00), cannam@95: " generate a DCT-I codelet"; cannam@95: cannam@95: "-redft10", cannam@95: Arg.Unit(fun () -> mode := REDFT10), cannam@95: " generate a DCT-II codelet"; cannam@95: cannam@95: "-redft01", cannam@95: Arg.Unit(fun () -> mode := REDFT01), cannam@95: " generate a DCT-III codelet"; cannam@95: cannam@95: "-redft11", cannam@95: Arg.Unit(fun () -> mode := REDFT11), cannam@95: " generate a DCT-IV codelet"; cannam@95: cannam@95: "-rodft00", cannam@95: Arg.Unit(fun () -> mode := RODFT00), cannam@95: " generate a DST-I codelet"; cannam@95: cannam@95: "-rodft10", cannam@95: Arg.Unit(fun () -> mode := RODFT10), cannam@95: " generate a DST-II codelet"; cannam@95: cannam@95: "-rodft01", cannam@95: Arg.Unit(fun () -> mode := RODFT01), cannam@95: " generate a DST-III codelet"; cannam@95: cannam@95: "-rodft11", cannam@95: Arg.Unit(fun () -> mode := RODFT11), cannam@95: " generate a DST-IV codelet"; cannam@95: cannam@95: "-normalization", cannam@95: Arg.String(fun x -> let ix = int_of_string x in normsqr := ix * ix), cannam@95: " normalization integer to divide by"; cannam@95: cannam@95: "-normsqr", cannam@95: Arg.String(fun x -> normsqr := int_of_string x), cannam@95: " integer square of normalization to divide by"; cannam@95: cannam@95: "-unitary", cannam@95: Arg.Unit(fun () -> unitary := true), cannam@95: " unitary normalization (up overall scale factor)"; cannam@95: cannam@95: "-noloop", cannam@95: Arg.Unit(fun () -> noloop := true), cannam@95: " no vector loop"; cannam@95: ] cannam@95: cannam@95: let sqrt_half = Complex.inverse_int_sqrt 2 cannam@95: let sqrt_two = Complex.int_sqrt 2 cannam@95: cannam@95: let rescale sc s1 s2 input i = cannam@95: if ((i == s1 || i == s2) && !unitary) then cannam@95: Complex.times (input i) sc cannam@95: else cannam@95: input i cannam@95: cannam@95: let generate n mode = cannam@95: let iarray = "I" cannam@95: and oarray = "O" cannam@95: and istride = "is" cannam@95: and ostride = "os" cannam@95: and i = "i" cannam@95: and v = "v" cannam@95: in cannam@95: cannam@95: let sign = !Genutil.sign cannam@95: and name = !Magic.codelet_name in cannam@95: cannam@95: let vistride = either_stride (!uistride) (C.SVar istride) cannam@95: and vostride = either_stride (!uostride) (C.SVar ostride) cannam@95: in cannam@95: cannam@95: let sovs = stride_to_string "ovs" !uovstride in cannam@95: let sivs = stride_to_string "ivs" !uivstride in cannam@95: cannam@95: let (transform, load_input, store_output, si1,si2,so1,so2) = match mode with cannam@95: | RDFT -> Trig.rdft sign, load_array_r, store_array_hc, -1,-1,-1,-1 cannam@95: | HDFT -> Trig.hdft sign, load_array_c, store_array_r, -1,-1,-1,-1 (* TODO *) cannam@95: | DHT -> Trig.dht 1, load_array_r, store_array_r, -1,-1,-1,-1 cannam@95: | REDFT00 -> Trig.dctI, load_array_r, store_array_r, 0,n-1,0,n-1 cannam@95: | REDFT10 -> Trig.dctII, load_array_r, store_array_r, -1,-1,0,-1 cannam@95: | REDFT01 -> Trig.dctIII, load_array_r, store_array_r, 0,-1,-1,-1 cannam@95: | REDFT11 -> Trig.dctIV, load_array_r, store_array_r, -1,-1,-1,-1 cannam@95: | RODFT00 -> Trig.dstI, load_array_r, store_array_r, -1,-1,-1,-1 cannam@95: | RODFT10 -> Trig.dstII, load_array_r, store_array_r, -1,-1,n-1,-1 cannam@95: | RODFT01 -> Trig.dstIII, load_array_r, store_array_r, n-1,-1,-1,-1 cannam@95: | RODFT11 -> Trig.dstIV, load_array_r, store_array_r, -1,-1,-1,-1 cannam@95: | _ -> failwith "must specify transform kind" cannam@95: in cannam@95: cannam@95: let locations = unique_array_c n in cannam@95: let input = locative_array_c n cannam@95: (C.array_subscript iarray vistride) cannam@95: (C.array_subscript "BUG" vistride) cannam@95: locations sivs in cannam@95: let output = rescale sqrt_half so1 so2 cannam@95: ((Complex.times (Complex.inverse_int_sqrt !normsqr)) cannam@95: @@ (transform n (rescale sqrt_two si1 si2 (load_array_c n input)))) in cannam@95: let oloc = cannam@95: locative_array_c n cannam@95: (C.array_subscript oarray vostride) cannam@95: (C.array_subscript "BUG" vostride) cannam@95: locations sovs in cannam@95: let odag = store_output n oloc output in cannam@95: let annot = standard_optimizer odag in cannam@95: cannam@95: let body = if !noloop then Block([], [Asch annot]) else Block ( cannam@95: [Decl ("INT", i)], cannam@95: [For (Expr_assign (CVar i, CVar v), cannam@95: Binop (" > ", CVar i, Integer 0), cannam@95: list_to_comma cannam@95: [Expr_assign (CVar i, CPlus [CVar i; CUminus (Integer 1)]); cannam@95: Expr_assign (CVar iarray, CPlus [CVar iarray; CVar sivs]); cannam@95: Expr_assign (CVar oarray, CPlus [CVar oarray; CVar sovs]); cannam@95: make_volatile_stride (2*n) (CVar istride); cannam@95: make_volatile_stride (2*n) (CVar ostride) cannam@95: ], cannam@95: Asch annot) cannam@95: ]) cannam@95: in cannam@95: cannam@95: let tree = cannam@95: Fcn ((if !Magic.standalone then "void" else "static void"), name, cannam@95: ([Decl (C.constrealtypep, iarray); cannam@95: Decl (C.realtypep, oarray)] cannam@95: @ (if stride_fixed !uistride then [] cannam@95: else [Decl (C.stridetype, istride)]) cannam@95: @ (if stride_fixed !uostride then [] cannam@95: else [Decl (C.stridetype, ostride)]) cannam@95: @ (if !noloop then [] else cannam@95: [Decl ("INT", v)] cannam@95: @ (if stride_fixed !uivstride then [] cannam@95: else [Decl ("INT", "ivs")]) cannam@95: @ (if stride_fixed !uovstride then [] cannam@95: else [Decl ("INT", "ovs")]))), cannam@95: finalize_fcn body) cannam@95: cannam@95: in let desc = cannam@95: Printf.sprintf cannam@95: "static const kr2r_desc desc = { %d, \"%s\", %s, &GENUS, %s };\n\n" cannam@95: n name (flops_of tree) cannam@95: (match mode with cannam@95: | RDFT -> "RDFT00" cannam@95: | HDFT -> "HDFT00" cannam@95: | DHT -> "DHT" cannam@95: | REDFT00 -> "REDFT00" cannam@95: | REDFT10 -> "REDFT10" cannam@95: | REDFT01 -> "REDFT01" cannam@95: | REDFT11 -> "REDFT11" cannam@95: | RODFT00 -> "RODFT00" cannam@95: | RODFT10 -> "RODFT10" cannam@95: | RODFT01 -> "RODFT01" cannam@95: | RODFT11 -> "RODFT11" cannam@95: | _ -> failwith "must specify a transform kind") cannam@95: cannam@95: and init = cannam@95: (declare_register_fcn name) ^ cannam@95: "{" ^ cannam@95: " X(kr2r_register)(p, " ^ name ^ ", &desc);\n" ^ cannam@95: "}\n" cannam@95: cannam@95: in cannam@95: (unparse tree) ^ "\n" ^ (if !Magic.standalone then "" else desc ^ init) cannam@95: cannam@95: cannam@95: let main () = cannam@95: begin cannam@95: parse speclist usage; cannam@95: print_string (generate (check_size ()) !mode); cannam@95: end cannam@95: cannam@95: let _ = main()