annotate src/fftw-3.3.8/genfft/gen_hc2c.ml @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 (*
Chris@82 2 * Copyright (c) 1997-1999 Massachusetts Institute of Technology
Chris@82 3 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 4 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 5 *
Chris@82 6 * This program is free software; you can redistribute it and/or modify
Chris@82 7 * it under the terms of the GNU General Public License as published by
Chris@82 8 * the Free Software Foundation; either version 2 of the License, or
Chris@82 9 * (at your option) any later version.
Chris@82 10 *
Chris@82 11 * This program is distributed in the hope that it will be useful,
Chris@82 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 14 * GNU General Public License for more details.
Chris@82 15 *
Chris@82 16 * You should have received a copy of the GNU General Public License
Chris@82 17 * along with this program; if not, write to the Free Software
Chris@82 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 19 *
Chris@82 20 *)
Chris@82 21
Chris@82 22 open Util
Chris@82 23 open Genutil
Chris@82 24 open C
Chris@82 25
Chris@82 26
Chris@82 27 type ditdif = DIT | DIF
Chris@82 28 let ditdif = ref DIT
Chris@82 29 let usage = "Usage: " ^ Sys.argv.(0) ^ " -n <number> [ -dit | -dif ]"
Chris@82 30
Chris@82 31 let urs = ref Stride_variable
Chris@82 32
Chris@82 33 let speclist = [
Chris@82 34 "-dit",
Chris@82 35 Arg.Unit(fun () -> ditdif := DIT),
Chris@82 36 " generate a DIT codelet";
Chris@82 37
Chris@82 38 "-dif",
Chris@82 39 Arg.Unit(fun () -> ditdif := DIF),
Chris@82 40 " generate a DIF codelet";
Chris@82 41
Chris@82 42 "-with-rs",
Chris@82 43 Arg.String(fun x -> urs := arg_to_stride x),
Chris@82 44 " specialize for given R-stride";
Chris@82 45 ]
Chris@82 46
Chris@82 47 let byi = Complex.times Complex.i
Chris@82 48 let byui = Complex.times (Complex.uminus Complex.i)
Chris@82 49
Chris@82 50 let sym n f i = if (i < n - i) then f i else Complex.conj (f i)
Chris@82 51
Chris@82 52 let shuffle_eo fe fo i = if i mod 2 == 0 then fe (i/2) else fo ((i-1)/2)
Chris@82 53
Chris@82 54 let generate n =
Chris@82 55 let rs = "rs"
Chris@82 56 and twarray = "W"
Chris@82 57 and m = "m" and mb = "mb" and me = "me" and ms = "ms"
Chris@82 58
Chris@82 59 (* the array names are from the point of view of the complex array
Chris@82 60 (output in R2C, input in C2R) *)
Chris@82 61 and arp = "Rp" (* real, positive *)
Chris@82 62 and aip = "Ip" (* imag, positive *)
Chris@82 63 and arm = "Rm" (* real, negative *)
Chris@82 64 and aim = "Im" (* imag, negative *)
Chris@82 65
Chris@82 66 in
Chris@82 67
Chris@82 68 let sign = !Genutil.sign
Chris@82 69 and name = !Magic.codelet_name
Chris@82 70 and byvl x = choose_simd x (ctimes (CVar "VL", x)) in
Chris@82 71
Chris@82 72 let (bytwiddle, num_twiddles, twdesc) = Twiddle.twiddle_policy 1 false in
Chris@82 73 let nt = num_twiddles n in
Chris@82 74
Chris@82 75 let byw = bytwiddle n sign (twiddle_array nt twarray) in
Chris@82 76
Chris@82 77 let vrs = either_stride (!urs) (C.SVar rs) in
Chris@82 78
Chris@82 79 (* assume a single location. No point in doing alias analysis *)
Chris@82 80 let the_location = (Unique.make (), Unique.make ()) in
Chris@82 81 let locations _ = the_location in
Chris@82 82
Chris@82 83 let locr = (locative_array_c n
Chris@82 84 (C.array_subscript arp vrs)
Chris@82 85 (C.array_subscript arm vrs)
Chris@82 86 locations "BUG")
Chris@82 87 and loci = (locative_array_c n
Chris@82 88 (C.array_subscript aip vrs)
Chris@82 89 (C.array_subscript aim vrs)
Chris@82 90 locations "BUG")
Chris@82 91 and locp = (locative_array_c n
Chris@82 92 (C.array_subscript arp vrs)
Chris@82 93 (C.array_subscript aip vrs)
Chris@82 94 locations "BUG")
Chris@82 95 and locm = (locative_array_c n
Chris@82 96 (C.array_subscript arm vrs)
Chris@82 97 (C.array_subscript aim vrs)
Chris@82 98 locations "BUG")
Chris@82 99 in
Chris@82 100 let locri i = if i mod 2 == 0 then locr (i/2) else loci ((i-1)/2)
Chris@82 101 and locpm i = if i < n - i then locp i else locm (n-1-i)
Chris@82 102 in
Chris@82 103
Chris@82 104 let asch =
Chris@82 105 match !ditdif with
Chris@82 106 | DIT ->
Chris@82 107 let output = Fft.dft sign n (byw (load_array_c n locri)) in
Chris@82 108 let odag = store_array_c n locpm (sym n output) in
Chris@82 109 standard_optimizer odag
Chris@82 110
Chris@82 111 | DIF ->
Chris@82 112 let output = byw (Fft.dft sign n (sym n (load_array_c n locpm))) in
Chris@82 113 let odag = store_array_c n locri output in
Chris@82 114 standard_optimizer odag
Chris@82 115 in
Chris@82 116
Chris@82 117 let vms = CVar "ms"
Chris@82 118 and varp = CVar arp
Chris@82 119 and vaip = CVar aip
Chris@82 120 and varm = CVar arm
Chris@82 121 and vaim = CVar aim
Chris@82 122 and vm = CVar m and vmb = CVar mb and vme = CVar me
Chris@82 123 in
Chris@82 124 let body = Block (
Chris@82 125 [Decl ("INT", m)],
Chris@82 126 [For (list_to_comma
Chris@82 127 [Expr_assign (vm, vmb);
Chris@82 128 Expr_assign (CVar twarray,
Chris@82 129 CPlus [CVar twarray;
Chris@82 130 ctimes (CPlus [vmb; CUminus (Integer 1)],
Chris@82 131 Integer nt)])],
Chris@82 132 Binop (" < ", vm, vme),
Chris@82 133 list_to_comma
Chris@82 134 [Expr_assign (vm, CPlus [vm; byvl (Integer 1)]);
Chris@82 135 Expr_assign (varp, CPlus [varp; byvl vms]);
Chris@82 136 Expr_assign (vaip, CPlus [vaip; byvl vms]);
Chris@82 137 Expr_assign (varm, CPlus [varm; CUminus (byvl vms)]);
Chris@82 138 Expr_assign (vaim, CPlus [vaim; CUminus (byvl vms)]);
Chris@82 139 Expr_assign (CVar twarray, CPlus [CVar twarray;
Chris@82 140 byvl (Integer nt)]);
Chris@82 141 make_volatile_stride (4*n) (CVar rs)
Chris@82 142 ],
Chris@82 143 Asch asch)])
Chris@82 144 in
Chris@82 145
Chris@82 146 let tree =
Chris@82 147 Fcn ("static void", name,
Chris@82 148 [Decl (C.realtypep, arp);
Chris@82 149 Decl (C.realtypep, aip);
Chris@82 150 Decl (C.realtypep, arm);
Chris@82 151 Decl (C.realtypep, aim);
Chris@82 152 Decl (C.constrealtypep, twarray);
Chris@82 153 Decl (C.stridetype, rs);
Chris@82 154 Decl ("INT", mb);
Chris@82 155 Decl ("INT", me);
Chris@82 156 Decl ("INT", ms)],
Chris@82 157 finalize_fcn body)
Chris@82 158 in
Chris@82 159 let twinstr =
Chris@82 160 Printf.sprintf "static const tw_instr twinstr[] = %s;\n\n"
Chris@82 161 (twinstr_to_string "VL" (twdesc n))
Chris@82 162 and desc =
Chris@82 163 Printf.sprintf
Chris@82 164 "static const hc2c_desc desc = {%d, \"%s\", twinstr, &GENUS, %s};\n\n"
Chris@82 165 n name (flops_of tree)
Chris@82 166 and register = "X(khc2c_register)"
Chris@82 167
Chris@82 168 in
Chris@82 169 let init =
Chris@82 170 "\n" ^
Chris@82 171 twinstr ^
Chris@82 172 desc ^
Chris@82 173 (declare_register_fcn name) ^
Chris@82 174 (Printf.sprintf "{\n%s(p, %s, &desc, HC2C_VIA_RDFT);\n}" register name)
Chris@82 175 in
Chris@82 176
Chris@82 177 (unparse tree) ^ "\n" ^ init
Chris@82 178
Chris@82 179
Chris@82 180 let main () =
Chris@82 181 begin
Chris@82 182 parse (speclist @ Twiddle.speclist) usage;
Chris@82 183 print_string (generate (check_size ()));
Chris@82 184 end
Chris@82 185
Chris@82 186 let _ = main()