cannam@127: (* cannam@127: * Copyright (c) 1997-1999 Massachusetts Institute of Technology cannam@127: * Copyright (c) 2003, 2007-14 Matteo Frigo cannam@127: * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology cannam@127: * cannam@127: * This program is free software; you can redistribute it and/or modify cannam@127: * it under the terms of the GNU General Public License as published by cannam@127: * the Free Software Foundation; either version 2 of the License, or cannam@127: * (at your option) any later version. cannam@127: * cannam@127: * This program is distributed in the hope that it will be useful, cannam@127: * but WITHOUT ANY WARRANTY; without even the implied warranty of cannam@127: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the cannam@127: * GNU General Public License for more details. cannam@127: * cannam@127: * You should have received a copy of the GNU General Public License cannam@127: * along with this program; if not, write to the Free Software cannam@127: * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA cannam@127: * cannam@127: *) cannam@127: cannam@127: (* generation of trigonometric transforms *) cannam@127: cannam@127: open Util cannam@127: open Genutil cannam@127: open C cannam@127: cannam@127: cannam@127: let usage = "Usage: " ^ Sys.argv.(0) ^ " -n " cannam@127: cannam@127: let uistride = ref Stride_variable cannam@127: let uostride = ref Stride_variable cannam@127: let uivstride = ref Stride_variable cannam@127: let uovstride = ref Stride_variable cannam@127: cannam@127: type mode = cannam@127: | RDFT cannam@127: | HDFT cannam@127: | DHT cannam@127: | REDFT00 cannam@127: | REDFT10 cannam@127: | REDFT01 cannam@127: | REDFT11 cannam@127: | RODFT00 cannam@127: | RODFT10 cannam@127: | RODFT01 cannam@127: | RODFT11 cannam@127: | NONE cannam@127: cannam@127: let mode = ref NONE cannam@127: let normsqr = ref 1 cannam@127: let unitary = ref false cannam@127: let noloop = ref false cannam@127: cannam@127: let speclist = [ cannam@127: "-with-istride", cannam@127: Arg.String(fun x -> uistride := arg_to_stride x), cannam@127: " specialize for given input stride"; cannam@127: cannam@127: "-with-ostride", cannam@127: Arg.String(fun x -> uostride := arg_to_stride x), cannam@127: " specialize for given output stride"; cannam@127: cannam@127: "-with-ivstride", cannam@127: Arg.String(fun x -> uivstride := arg_to_stride x), cannam@127: " specialize for given input vector stride"; cannam@127: cannam@127: "-with-ovstride", cannam@127: Arg.String(fun x -> uovstride := arg_to_stride x), cannam@127: " specialize for given output vector stride"; cannam@127: cannam@127: "-rdft", cannam@127: Arg.Unit(fun () -> mode := RDFT), cannam@127: " generate a real DFT codelet"; cannam@127: cannam@127: "-hdft", cannam@127: Arg.Unit(fun () -> mode := HDFT), cannam@127: " generate a Hermitian DFT codelet"; cannam@127: cannam@127: "-dht", cannam@127: Arg.Unit(fun () -> mode := DHT), cannam@127: " generate a DHT codelet"; cannam@127: cannam@127: "-redft00", cannam@127: Arg.Unit(fun () -> mode := REDFT00), cannam@127: " generate a DCT-I codelet"; cannam@127: cannam@127: "-redft10", cannam@127: Arg.Unit(fun () -> mode := REDFT10), cannam@127: " generate a DCT-II codelet"; cannam@127: cannam@127: "-redft01", cannam@127: Arg.Unit(fun () -> mode := REDFT01), cannam@127: " generate a DCT-III codelet"; cannam@127: cannam@127: "-redft11", cannam@127: Arg.Unit(fun () -> mode := REDFT11), cannam@127: " generate a DCT-IV codelet"; cannam@127: cannam@127: "-rodft00", cannam@127: Arg.Unit(fun () -> mode := RODFT00), cannam@127: " generate a DST-I codelet"; cannam@127: cannam@127: "-rodft10", cannam@127: Arg.Unit(fun () -> mode := RODFT10), cannam@127: " generate a DST-II codelet"; cannam@127: cannam@127: "-rodft01", cannam@127: Arg.Unit(fun () -> mode := RODFT01), cannam@127: " generate a DST-III codelet"; cannam@127: cannam@127: "-rodft11", cannam@127: Arg.Unit(fun () -> mode := RODFT11), cannam@127: " generate a DST-IV codelet"; cannam@127: cannam@127: "-normalization", cannam@127: Arg.String(fun x -> let ix = int_of_string x in normsqr := ix * ix), cannam@127: " normalization integer to divide by"; cannam@127: cannam@127: "-normsqr", cannam@127: Arg.String(fun x -> normsqr := int_of_string x), cannam@127: " integer square of normalization to divide by"; cannam@127: cannam@127: "-unitary", cannam@127: Arg.Unit(fun () -> unitary := true), cannam@127: " unitary normalization (up overall scale factor)"; cannam@127: cannam@127: "-noloop", cannam@127: Arg.Unit(fun () -> noloop := true), cannam@127: " no vector loop"; cannam@127: ] cannam@127: cannam@127: let sqrt_half = Complex.inverse_int_sqrt 2 cannam@127: let sqrt_two = Complex.int_sqrt 2 cannam@127: cannam@127: let rescale sc s1 s2 input i = cannam@127: if ((i == s1 || i == s2) && !unitary) then cannam@127: Complex.times (input i) sc cannam@127: else cannam@127: input i cannam@127: cannam@127: let generate n mode = cannam@127: let iarray = "I" cannam@127: and oarray = "O" cannam@127: and istride = "is" cannam@127: and ostride = "os" cannam@127: and i = "i" cannam@127: and v = "v" cannam@127: in cannam@127: cannam@127: let sign = !Genutil.sign cannam@127: and name = !Magic.codelet_name in cannam@127: cannam@127: let vistride = either_stride (!uistride) (C.SVar istride) cannam@127: and vostride = either_stride (!uostride) (C.SVar ostride) cannam@127: in cannam@127: cannam@127: let sovs = stride_to_string "ovs" !uovstride in cannam@127: let sivs = stride_to_string "ivs" !uivstride in cannam@127: cannam@127: let (transform, load_input, store_output, si1,si2,so1,so2) = match mode with cannam@127: | RDFT -> Trig.rdft sign, load_array_r, store_array_hc, -1,-1,-1,-1 cannam@127: | HDFT -> Trig.hdft sign, load_array_c, store_array_r, -1,-1,-1,-1 (* TODO *) cannam@127: | DHT -> Trig.dht 1, load_array_r, store_array_r, -1,-1,-1,-1 cannam@127: | REDFT00 -> Trig.dctI, load_array_r, store_array_r, 0,n-1,0,n-1 cannam@127: | REDFT10 -> Trig.dctII, load_array_r, store_array_r, -1,-1,0,-1 cannam@127: | REDFT01 -> Trig.dctIII, load_array_r, store_array_r, 0,-1,-1,-1 cannam@127: | REDFT11 -> Trig.dctIV, load_array_r, store_array_r, -1,-1,-1,-1 cannam@127: | RODFT00 -> Trig.dstI, load_array_r, store_array_r, -1,-1,-1,-1 cannam@127: | RODFT10 -> Trig.dstII, load_array_r, store_array_r, -1,-1,n-1,-1 cannam@127: | RODFT01 -> Trig.dstIII, load_array_r, store_array_r, n-1,-1,-1,-1 cannam@127: | RODFT11 -> Trig.dstIV, load_array_r, store_array_r, -1,-1,-1,-1 cannam@127: | _ -> failwith "must specify transform kind" cannam@127: in cannam@127: cannam@127: let locations = unique_array_c n in cannam@127: let input = locative_array_c n cannam@127: (C.array_subscript iarray vistride) cannam@127: (C.array_subscript "BUG" vistride) cannam@127: locations sivs in cannam@127: let output = rescale sqrt_half so1 so2 cannam@127: ((Complex.times (Complex.inverse_int_sqrt !normsqr)) cannam@127: @@ (transform n (rescale sqrt_two si1 si2 (load_array_c n input)))) in cannam@127: let oloc = cannam@127: locative_array_c n cannam@127: (C.array_subscript oarray vostride) cannam@127: (C.array_subscript "BUG" vostride) cannam@127: locations sovs in cannam@127: let odag = store_output n oloc output in cannam@127: let annot = standard_optimizer odag in cannam@127: cannam@127: let body = if !noloop then Block([], [Asch annot]) else Block ( cannam@127: [Decl ("INT", i)], cannam@127: [For (Expr_assign (CVar i, CVar v), cannam@127: Binop (" > ", CVar i, Integer 0), cannam@127: list_to_comma cannam@127: [Expr_assign (CVar i, CPlus [CVar i; CUminus (Integer 1)]); cannam@127: Expr_assign (CVar iarray, CPlus [CVar iarray; CVar sivs]); cannam@127: Expr_assign (CVar oarray, CPlus [CVar oarray; CVar sovs]); cannam@127: make_volatile_stride (2*n) (CVar istride); cannam@127: make_volatile_stride (2*n) (CVar ostride) cannam@127: ], cannam@127: Asch annot) cannam@127: ]) cannam@127: in cannam@127: cannam@127: let tree = cannam@127: Fcn ((if !Magic.standalone then "void" else "static void"), name, cannam@127: ([Decl (C.constrealtypep, iarray); cannam@127: Decl (C.realtypep, oarray)] cannam@127: @ (if stride_fixed !uistride then [] cannam@127: else [Decl (C.stridetype, istride)]) cannam@127: @ (if stride_fixed !uostride then [] cannam@127: else [Decl (C.stridetype, ostride)]) cannam@127: @ (if !noloop then [] else cannam@127: [Decl ("INT", v)] cannam@127: @ (if stride_fixed !uivstride then [] cannam@127: else [Decl ("INT", "ivs")]) cannam@127: @ (if stride_fixed !uovstride then [] cannam@127: else [Decl ("INT", "ovs")]))), cannam@127: finalize_fcn body) cannam@127: cannam@127: in let desc = cannam@127: Printf.sprintf cannam@127: "static const kr2r_desc desc = { %d, \"%s\", %s, &GENUS, %s };\n\n" cannam@127: n name (flops_of tree) cannam@127: (match mode with cannam@127: | RDFT -> "RDFT00" cannam@127: | HDFT -> "HDFT00" cannam@127: | DHT -> "DHT" cannam@127: | REDFT00 -> "REDFT00" cannam@127: | REDFT10 -> "REDFT10" cannam@127: | REDFT01 -> "REDFT01" cannam@127: | REDFT11 -> "REDFT11" cannam@127: | RODFT00 -> "RODFT00" cannam@127: | RODFT10 -> "RODFT10" cannam@127: | RODFT01 -> "RODFT01" cannam@127: | RODFT11 -> "RODFT11" cannam@127: | _ -> failwith "must specify a transform kind") cannam@127: cannam@127: and init = cannam@127: (declare_register_fcn name) ^ cannam@127: "{" ^ cannam@127: " X(kr2r_register)(p, " ^ name ^ ", &desc);\n" ^ cannam@127: "}\n" cannam@127: cannam@127: in cannam@127: (unparse tree) ^ "\n" ^ (if !Magic.standalone then "" else desc ^ init) cannam@127: cannam@127: cannam@127: let main () = cannam@127: begin cannam@127: parse speclist usage; cannam@127: print_string (generate (check_size ()) !mode); cannam@127: end cannam@127: cannam@127: let _ = main()