cannam@167: (* cannam@167: * Copyright (c) 1997-1999 Massachusetts Institute of Technology cannam@167: * Copyright (c) 2003, 2007-14 Matteo Frigo cannam@167: * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology cannam@167: * cannam@167: * This program is free software; you can redistribute it and/or modify cannam@167: * it under the terms of the GNU General Public License as published by cannam@167: * the Free Software Foundation; either version 2 of the License, or cannam@167: * (at your option) any later version. cannam@167: * cannam@167: * This program is distributed in the hope that it will be useful, cannam@167: * but WITHOUT ANY WARRANTY; without even the implied warranty of cannam@167: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the cannam@167: * GNU General Public License for more details. cannam@167: * cannam@167: * You should have received a copy of the GNU General Public License cannam@167: * along with this program; if not, write to the Free Software cannam@167: * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA cannam@167: * cannam@167: *) cannam@167: cannam@167: (* generation of trigonometric transforms *) cannam@167: cannam@167: open Util cannam@167: open Genutil cannam@167: open C cannam@167: cannam@167: cannam@167: let usage = "Usage: " ^ Sys.argv.(0) ^ " -n " cannam@167: cannam@167: let uistride = ref Stride_variable cannam@167: let uostride = ref Stride_variable cannam@167: let uivstride = ref Stride_variable cannam@167: let uovstride = ref Stride_variable cannam@167: cannam@167: type mode = cannam@167: | RDFT cannam@167: | HDFT cannam@167: | DHT cannam@167: | REDFT00 cannam@167: | REDFT10 cannam@167: | REDFT01 cannam@167: | REDFT11 cannam@167: | RODFT00 cannam@167: | RODFT10 cannam@167: | RODFT01 cannam@167: | RODFT11 cannam@167: | NONE cannam@167: cannam@167: let mode = ref NONE cannam@167: let normsqr = ref 1 cannam@167: let unitary = ref false cannam@167: let noloop = ref false cannam@167: cannam@167: let speclist = [ cannam@167: "-with-istride", cannam@167: Arg.String(fun x -> uistride := arg_to_stride x), cannam@167: " specialize for given input stride"; cannam@167: cannam@167: "-with-ostride", cannam@167: Arg.String(fun x -> uostride := arg_to_stride x), cannam@167: " specialize for given output stride"; cannam@167: cannam@167: "-with-ivstride", cannam@167: Arg.String(fun x -> uivstride := arg_to_stride x), cannam@167: " specialize for given input vector stride"; cannam@167: cannam@167: "-with-ovstride", cannam@167: Arg.String(fun x -> uovstride := arg_to_stride x), cannam@167: " specialize for given output vector stride"; cannam@167: cannam@167: "-rdft", cannam@167: Arg.Unit(fun () -> mode := RDFT), cannam@167: " generate a real DFT codelet"; cannam@167: cannam@167: "-hdft", cannam@167: Arg.Unit(fun () -> mode := HDFT), cannam@167: " generate a Hermitian DFT codelet"; cannam@167: cannam@167: "-dht", cannam@167: Arg.Unit(fun () -> mode := DHT), cannam@167: " generate a DHT codelet"; cannam@167: cannam@167: "-redft00", cannam@167: Arg.Unit(fun () -> mode := REDFT00), cannam@167: " generate a DCT-I codelet"; cannam@167: cannam@167: "-redft10", cannam@167: Arg.Unit(fun () -> mode := REDFT10), cannam@167: " generate a DCT-II codelet"; cannam@167: cannam@167: "-redft01", cannam@167: Arg.Unit(fun () -> mode := REDFT01), cannam@167: " generate a DCT-III codelet"; cannam@167: cannam@167: "-redft11", cannam@167: Arg.Unit(fun () -> mode := REDFT11), cannam@167: " generate a DCT-IV codelet"; cannam@167: cannam@167: "-rodft00", cannam@167: Arg.Unit(fun () -> mode := RODFT00), cannam@167: " generate a DST-I codelet"; cannam@167: cannam@167: "-rodft10", cannam@167: Arg.Unit(fun () -> mode := RODFT10), cannam@167: " generate a DST-II codelet"; cannam@167: cannam@167: "-rodft01", cannam@167: Arg.Unit(fun () -> mode := RODFT01), cannam@167: " generate a DST-III codelet"; cannam@167: cannam@167: "-rodft11", cannam@167: Arg.Unit(fun () -> mode := RODFT11), cannam@167: " generate a DST-IV codelet"; cannam@167: cannam@167: "-normalization", cannam@167: Arg.String(fun x -> let ix = int_of_string x in normsqr := ix * ix), cannam@167: " normalization integer to divide by"; cannam@167: cannam@167: "-normsqr", cannam@167: Arg.String(fun x -> normsqr := int_of_string x), cannam@167: " integer square of normalization to divide by"; cannam@167: cannam@167: "-unitary", cannam@167: Arg.Unit(fun () -> unitary := true), cannam@167: " unitary normalization (up overall scale factor)"; cannam@167: cannam@167: "-noloop", cannam@167: Arg.Unit(fun () -> noloop := true), cannam@167: " no vector loop"; cannam@167: ] cannam@167: cannam@167: let sqrt_half = Complex.inverse_int_sqrt 2 cannam@167: let sqrt_two = Complex.int_sqrt 2 cannam@167: cannam@167: let rescale sc s1 s2 input i = cannam@167: if ((i == s1 || i == s2) && !unitary) then cannam@167: Complex.times (input i) sc cannam@167: else cannam@167: input i cannam@167: cannam@167: let generate n mode = cannam@167: let iarray = "I" cannam@167: and oarray = "O" cannam@167: and istride = "is" cannam@167: and ostride = "os" cannam@167: and i = "i" cannam@167: and v = "v" cannam@167: in cannam@167: cannam@167: let sign = !Genutil.sign cannam@167: and name = !Magic.codelet_name in cannam@167: cannam@167: let vistride = either_stride (!uistride) (C.SVar istride) cannam@167: and vostride = either_stride (!uostride) (C.SVar ostride) cannam@167: in cannam@167: cannam@167: let sovs = stride_to_string "ovs" !uovstride in cannam@167: let sivs = stride_to_string "ivs" !uivstride in cannam@167: cannam@167: let (transform, load_input, store_output, si1,si2,so1,so2) = match mode with cannam@167: | RDFT -> Trig.rdft sign, load_array_r, store_array_hc, -1,-1,-1,-1 cannam@167: | HDFT -> Trig.hdft sign, load_array_c, store_array_r, -1,-1,-1,-1 (* TODO *) cannam@167: | DHT -> Trig.dht 1, load_array_r, store_array_r, -1,-1,-1,-1 cannam@167: | REDFT00 -> Trig.dctI, load_array_r, store_array_r, 0,n-1,0,n-1 cannam@167: | REDFT10 -> Trig.dctII, load_array_r, store_array_r, -1,-1,0,-1 cannam@167: | REDFT01 -> Trig.dctIII, load_array_r, store_array_r, 0,-1,-1,-1 cannam@167: | REDFT11 -> Trig.dctIV, load_array_r, store_array_r, -1,-1,-1,-1 cannam@167: | RODFT00 -> Trig.dstI, load_array_r, store_array_r, -1,-1,-1,-1 cannam@167: | RODFT10 -> Trig.dstII, load_array_r, store_array_r, -1,-1,n-1,-1 cannam@167: | RODFT01 -> Trig.dstIII, load_array_r, store_array_r, n-1,-1,-1,-1 cannam@167: | RODFT11 -> Trig.dstIV, load_array_r, store_array_r, -1,-1,-1,-1 cannam@167: | _ -> failwith "must specify transform kind" cannam@167: in cannam@167: cannam@167: let locations = unique_array_c n in cannam@167: let input = locative_array_c n cannam@167: (C.array_subscript iarray vistride) cannam@167: (C.array_subscript "BUG" vistride) cannam@167: locations sivs in cannam@167: let output = rescale sqrt_half so1 so2 cannam@167: ((Complex.times (Complex.inverse_int_sqrt !normsqr)) cannam@167: @@ (transform n (rescale sqrt_two si1 si2 (load_array_c n input)))) in cannam@167: let oloc = cannam@167: locative_array_c n cannam@167: (C.array_subscript oarray vostride) cannam@167: (C.array_subscript "BUG" vostride) cannam@167: locations sovs in cannam@167: let odag = store_output n oloc output in cannam@167: let annot = standard_optimizer odag in cannam@167: cannam@167: let body = if !noloop then Block([], [Asch annot]) else Block ( cannam@167: [Decl ("INT", i)], cannam@167: [For (Expr_assign (CVar i, CVar v), cannam@167: Binop (" > ", CVar i, Integer 0), cannam@167: list_to_comma cannam@167: [Expr_assign (CVar i, CPlus [CVar i; CUminus (Integer 1)]); cannam@167: Expr_assign (CVar iarray, CPlus [CVar iarray; CVar sivs]); cannam@167: Expr_assign (CVar oarray, CPlus [CVar oarray; CVar sovs]); cannam@167: make_volatile_stride (2*n) (CVar istride); cannam@167: make_volatile_stride (2*n) (CVar ostride) cannam@167: ], cannam@167: Asch annot) cannam@167: ]) cannam@167: in cannam@167: cannam@167: let tree = cannam@167: Fcn ((if !Magic.standalone then "void" else "static void"), name, cannam@167: ([Decl (C.constrealtypep, iarray); cannam@167: Decl (C.realtypep, oarray)] cannam@167: @ (if stride_fixed !uistride then [] cannam@167: else [Decl (C.stridetype, istride)]) cannam@167: @ (if stride_fixed !uostride then [] cannam@167: else [Decl (C.stridetype, ostride)]) cannam@167: @ (if !noloop then [] else cannam@167: [Decl ("INT", v)] cannam@167: @ (if stride_fixed !uivstride then [] cannam@167: else [Decl ("INT", "ivs")]) cannam@167: @ (if stride_fixed !uovstride then [] cannam@167: else [Decl ("INT", "ovs")]))), cannam@167: finalize_fcn body) cannam@167: cannam@167: in let desc = cannam@167: Printf.sprintf cannam@167: "static const kr2r_desc desc = { %d, \"%s\", %s, &GENUS, %s };\n\n" cannam@167: n name (flops_of tree) cannam@167: (match mode with cannam@167: | RDFT -> "RDFT00" cannam@167: | HDFT -> "HDFT00" cannam@167: | DHT -> "DHT" cannam@167: | REDFT00 -> "REDFT00" cannam@167: | REDFT10 -> "REDFT10" cannam@167: | REDFT01 -> "REDFT01" cannam@167: | REDFT11 -> "REDFT11" cannam@167: | RODFT00 -> "RODFT00" cannam@167: | RODFT10 -> "RODFT10" cannam@167: | RODFT01 -> "RODFT01" cannam@167: | RODFT11 -> "RODFT11" cannam@167: | _ -> failwith "must specify a transform kind") cannam@167: cannam@167: and init = cannam@167: (declare_register_fcn name) ^ cannam@167: "{" ^ cannam@167: " X(kr2r_register)(p, " ^ name ^ ", &desc);\n" ^ cannam@167: "}\n" cannam@167: cannam@167: in cannam@167: (unparse tree) ^ "\n" ^ (if !Magic.standalone then "" else desc ^ init) cannam@167: cannam@167: cannam@167: let main () = cannam@167: begin cannam@167: parse speclist usage; cannam@167: print_string (generate (check_size ()) !mode); cannam@167: end cannam@167: cannam@167: let _ = main()