annotate src/fftw-3.3.5/genfft/fft.ml @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 (*
Chris@42 2 * Copyright (c) 1997-1999 Massachusetts Institute of Technology
Chris@42 3 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 4 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 5 *
Chris@42 6 * This program is free software; you can redistribute it and/or modify
Chris@42 7 * it under the terms of the GNU General Public License as published by
Chris@42 8 * the Free Software Foundation; either version 2 of the License, or
Chris@42 9 * (at your option) any later version.
Chris@42 10 *
Chris@42 11 * This program is distributed in the hope that it will be useful,
Chris@42 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 14 * GNU General Public License for more details.
Chris@42 15 *
Chris@42 16 * You should have received a copy of the GNU General Public License
Chris@42 17 * along with this program; if not, write to the Free Software
Chris@42 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 19 *
Chris@42 20 *)
Chris@42 21
Chris@42 22
Chris@42 23 (* This is the part of the generator that actually computes the FFT
Chris@42 24 in symbolic form *)
Chris@42 25
Chris@42 26 open Complex
Chris@42 27 open Util
Chris@42 28
Chris@42 29 (* choose a suitable factor of n *)
Chris@42 30 let choose_factor n =
Chris@42 31 (* first choice: i such that gcd(i, n / i) = 1, i as big as possible *)
Chris@42 32 let choose1 n =
Chris@42 33 let rec loop i f =
Chris@42 34 if (i * i > n) then f
Chris@42 35 else if ((n mod i) == 0 && gcd i (n / i) == 1) then loop (i + 1) i
Chris@42 36 else loop (i + 1) f
Chris@42 37 in loop 1 1
Chris@42 38
Chris@42 39 (* second choice: the biggest factor i of n, where i < sqrt(n), if any *)
Chris@42 40 and choose2 n =
Chris@42 41 let rec loop i f =
Chris@42 42 if (i * i > n) then f
Chris@42 43 else if ((n mod i) == 0) then loop (i + 1) i
Chris@42 44 else loop (i + 1) f
Chris@42 45 in loop 1 1
Chris@42 46
Chris@42 47 in let i = choose1 n in
Chris@42 48 if (i > 1) then i
Chris@42 49 else choose2 n
Chris@42 50
Chris@42 51 let is_power_of_two n = (n > 0) && ((n - 1) land n == 0)
Chris@42 52
Chris@42 53 let rec dft_prime sign n input =
Chris@42 54 let sum filter i =
Chris@42 55 sigma 0 n (fun j ->
Chris@42 56 let coeff = filter (exp n (sign * i * j))
Chris@42 57 in coeff @* (input j)) in
Chris@42 58 let computation_even = array n (sum identity)
Chris@42 59 and computation_odd =
Chris@42 60 let sumr = array n (sum real)
Chris@42 61 and sumi = array n (sum ((times Complex.i) @@ imag)) in
Chris@42 62 array n (fun i ->
Chris@42 63 if (i = 0) then
Chris@42 64 (* expose some common subexpressions *)
Chris@42 65 input 0 @+
Chris@42 66 sigma 1 ((n + 1) / 2) (fun j -> input j @+ input (n - j))
Chris@42 67 else
Chris@42 68 let i' = min i (n - i) in
Chris@42 69 if (i < n - i) then
Chris@42 70 sumr i' @+ sumi i'
Chris@42 71 else
Chris@42 72 sumr i' @- sumi i') in
Chris@42 73 if (n >= !Magic.rader_min) then
Chris@42 74 dft_rader sign n input
Chris@42 75 else if (n == 2) then
Chris@42 76 computation_even
Chris@42 77 else
Chris@42 78 computation_odd
Chris@42 79
Chris@42 80
Chris@42 81 and dft_rader sign p input =
Chris@42 82 let half =
Chris@42 83 let one_half = inverse_int 2 in
Chris@42 84 times one_half
Chris@42 85
Chris@42 86 and make_product n a b =
Chris@42 87 let scale_factor = inverse_int n in
Chris@42 88 array n (fun i -> a i @* (scale_factor @* b i)) in
Chris@42 89
Chris@42 90 (* generates a convolution using ffts. (all arguments are the
Chris@42 91 same as to gen_convolution, below) *)
Chris@42 92 let gen_convolution_by_fft n a b addtoall =
Chris@42 93 let fft_a = dft 1 n a
Chris@42 94 and fft_b = dft 1 n b in
Chris@42 95
Chris@42 96 let fft_ab = make_product n fft_a fft_b
Chris@42 97 and dc_term i = if (i == 0) then addtoall else zero in
Chris@42 98
Chris@42 99 let fft_ab1 = array n (fun i -> fft_ab i @+ dc_term i)
Chris@42 100 and sum = fft_a 0 in
Chris@42 101 let conv = dft (-1) n fft_ab1 in
Chris@42 102 (sum, conv)
Chris@42 103
Chris@42 104 (* alternate routine for convolution. Seems to work better for
Chris@42 105 small sizes. I have no idea why. *)
Chris@42 106 and gen_convolution_by_fft_alt n a b addtoall =
Chris@42 107 let ap = array n (fun i -> half (a i @+ a ((n - i) mod n)))
Chris@42 108 and am = array n (fun i -> half (a i @- a ((n - i) mod n)))
Chris@42 109 and bp = array n (fun i -> half (b i @+ b ((n - i) mod n)))
Chris@42 110 and bm = array n (fun i -> half (b i @- b ((n - i) mod n)))
Chris@42 111 in
Chris@42 112
Chris@42 113 let fft_ap = dft 1 n ap
Chris@42 114 and fft_am = dft 1 n am
Chris@42 115 and fft_bp = dft 1 n bp
Chris@42 116 and fft_bm = dft 1 n bm in
Chris@42 117
Chris@42 118 let fft_abpp = make_product n fft_ap fft_bp
Chris@42 119 and fft_abpm = make_product n fft_ap fft_bm
Chris@42 120 and fft_abmp = make_product n fft_am fft_bp
Chris@42 121 and fft_abmm = make_product n fft_am fft_bm
Chris@42 122 and sum = fft_ap 0 @+ fft_am 0
Chris@42 123 and dc_term i = if (i == 0) then addtoall else zero in
Chris@42 124
Chris@42 125 let fft_ab1 = array n (fun i -> (fft_abpp i @+ fft_abmm i) @+ dc_term i)
Chris@42 126 and fft_ab2 = array n (fun i -> fft_abpm i @+ fft_abmp i) in
Chris@42 127 let conv1 = dft (-1) n fft_ab1
Chris@42 128 and conv2 = dft (-1) n fft_ab2 in
Chris@42 129 let conv = array n (fun i ->
Chris@42 130 conv1 i @+ conv2 i) in
Chris@42 131 (sum, conv)
Chris@42 132
Chris@42 133 (* generator of assignment list assigning conv to the convolution of
Chris@42 134 a and b, all of which are of length n. addtoall is added to
Chris@42 135 all of the elements of the result. Returns (sum, convolution) pair
Chris@42 136 where sum is the sum of the elements of a. *)
Chris@42 137
Chris@42 138 in let gen_convolution =
Chris@42 139 if (p <= !Magic.alternate_convolution) then
Chris@42 140 gen_convolution_by_fft_alt
Chris@42 141 else
Chris@42 142 gen_convolution_by_fft
Chris@42 143
Chris@42 144 (* fft generator for prime n = p using Rader's algorithm for
Chris@42 145 turning the fft into a convolution, which then can be
Chris@42 146 performed in a variety of ways *)
Chris@42 147 in
Chris@42 148 let g = find_generator p in
Chris@42 149 let ginv = pow_mod g (p - 2) p in
Chris@42 150 let input_perm = array p (fun i -> input (pow_mod g i p))
Chris@42 151 and omega_perm = array p (fun i -> exp p (sign * (pow_mod ginv i p)))
Chris@42 152 and output_perm = array p (fun i -> pow_mod ginv i p)
Chris@42 153 in let (sum, conv) =
Chris@42 154 (gen_convolution (p - 1) input_perm omega_perm (input 0))
Chris@42 155 in array p (fun i ->
Chris@42 156 if (i = 0) then
Chris@42 157 input 0 @+ sum
Chris@42 158 else
Chris@42 159 let i' = suchthat 0 (fun i' -> i = output_perm i')
Chris@42 160 in conv i')
Chris@42 161
Chris@42 162 (* our modified version of the conjugate-pair split-radix algorithm,
Chris@42 163 which reduces the number of multiplications by rescaling the
Chris@42 164 sub-transforms (power-of-two n's only) *)
Chris@42 165 and newsplit sign n input =
Chris@42 166 let rec s n k = (* recursive scale factor *)
Chris@42 167 if n <= 4 then
Chris@42 168 one
Chris@42 169 else
Chris@42 170 let k4 = (abs k) mod (n / 4) in
Chris@42 171 let k4' = if k4 <= (n / 8) then k4 else (n/4 - k4) in
Chris@42 172 (s (n / 4) k4') @* (real (exp n k4'))
Chris@42 173
Chris@42 174 and sinv n k = (* 1 / s(n,k) *)
Chris@42 175 if n <= 4 then
Chris@42 176 one
Chris@42 177 else
Chris@42 178 let k4 = (abs k) mod (n / 4) in
Chris@42 179 let k4' = if k4 <= (n / 8) then k4 else (n/4 - k4) in
Chris@42 180 (sinv (n / 4) k4') @* (sec n k4')
Chris@42 181
Chris@42 182 in let sdiv2 n k = (s n k) @* (sinv (2*n) k) (* s(n,k) / s(2*n,k) *)
Chris@42 183 and sdiv4 n k = (* s(n,k) / s(4*n,k) *)
Chris@42 184 let k4 = (abs k) mod n in
Chris@42 185 sec (4*n) (if k4 <= (n / 2) then k4 else (n - k4))
Chris@42 186
Chris@42 187 in let t n k = (exp n k) @* (sdiv4 (n/4) k)
Chris@42 188
Chris@42 189 and dft1 input = input
Chris@42 190 and dft2 input = array 2 (fun k -> (input 0) @+ ((input 1) @* exp 2 k))
Chris@42 191
Chris@42 192 in let rec newsplit0 sign n input =
Chris@42 193 if (n == 1) then dft1 input
Chris@42 194 else if (n == 2) then dft2 input
Chris@42 195 else let u = newsplit0 sign (n / 2) (fun i -> input (i*2))
Chris@42 196 and z = newsplitS sign (n / 4) (fun i -> input (i*4 + 1))
Chris@42 197 and z' = newsplitS sign (n / 4) (fun i -> input ((n + i*4 - 1) mod n))
Chris@42 198 and twid = array n (fun k -> s (n/4) k @* exp n (sign * k)) in
Chris@42 199 let w = array n (fun k -> twid k @* z (k mod (n / 4)))
Chris@42 200 and w' = array n (fun k -> conj (twid k) @* z' (k mod (n / 4))) in
Chris@42 201 let ww = array n (fun k -> w k @+ w' k) in
Chris@42 202 array n (fun k -> u (k mod (n / 2)) @+ ww k)
Chris@42 203
Chris@42 204 and newsplitS sign n input =
Chris@42 205 if (n == 1) then dft1 input
Chris@42 206 else if (n == 2) then dft2 input
Chris@42 207 else let u = newsplitS2 sign (n / 2) (fun i -> input (i*2))
Chris@42 208 and z = newsplitS sign (n / 4) (fun i -> input (i*4 + 1))
Chris@42 209 and z' = newsplitS sign (n / 4) (fun i -> input ((n + i*4 - 1) mod n)) in
Chris@42 210 let w = array n (fun k -> t n (sign * k) @* z (k mod (n / 4)))
Chris@42 211 and w' = array n (fun k -> conj (t n (sign * k)) @* z' (k mod (n / 4))) in
Chris@42 212 let ww = array n (fun k -> w k @+ w' k) in
Chris@42 213 array n (fun k -> u (k mod (n / 2)) @+ ww k)
Chris@42 214
Chris@42 215 and newsplitS2 sign n input =
Chris@42 216 if (n == 1) then dft1 input
Chris@42 217 else if (n == 2) then dft2 input
Chris@42 218 else let u = newsplitS4 sign (n / 2) (fun i -> input (i*2))
Chris@42 219 and z = newsplitS sign (n / 4) (fun i -> input (i*4 + 1))
Chris@42 220 and z' = newsplitS sign (n / 4) (fun i -> input ((n + i*4 - 1) mod n)) in
Chris@42 221 let w = array n (fun k -> t n (sign * k) @* z (k mod (n / 4)))
Chris@42 222 and w' = array n (fun k -> conj (t n (sign * k)) @* z' (k mod (n / 4))) in
Chris@42 223 let ww = array n (fun k -> (w k @+ w' k) @* (sdiv2 n k)) in
Chris@42 224 array n (fun k -> u (k mod (n / 2)) @+ ww k)
Chris@42 225
Chris@42 226 and newsplitS4 sign n input =
Chris@42 227 if (n == 1) then dft1 input
Chris@42 228 else if (n == 2) then
Chris@42 229 let f = dft2 input
Chris@42 230 in array 2 (fun k -> (f k) @* (sinv 8 k))
Chris@42 231 else let u = newsplitS2 sign (n / 2) (fun i -> input (i*2))
Chris@42 232 and z = newsplitS sign (n / 4) (fun i -> input (i*4 + 1))
Chris@42 233 and z' = newsplitS sign (n / 4) (fun i -> input ((n + i*4 - 1) mod n)) in
Chris@42 234 let w = array n (fun k -> t n (sign * k) @* z (k mod (n / 4)))
Chris@42 235 and w' = array n (fun k -> conj (t n (sign * k)) @* z' (k mod (n / 4))) in
Chris@42 236 let ww = array n (fun k -> w k @+ w' k) in
Chris@42 237 array n (fun k -> (u (k mod (n / 2)) @+ ww k) @* (sdiv4 n k))
Chris@42 238
Chris@42 239 in newsplit0 sign n input
Chris@42 240
Chris@42 241 and dft sign n input =
Chris@42 242 let rec cooley_tukey sign n1 n2 input =
Chris@42 243 let tmp1 =
Chris@42 244 array n2 (fun i2 ->
Chris@42 245 dft sign n1 (fun i1 -> input (i1 * n2 + i2))) in
Chris@42 246 let tmp2 =
Chris@42 247 array n1 (fun i1 ->
Chris@42 248 array n2 (fun i2 ->
Chris@42 249 exp n (sign * i1 * i2) @* tmp1 i2 i1)) in
Chris@42 250 let tmp3 = array n1 (fun i1 -> dft sign n2 (tmp2 i1)) in
Chris@42 251 (fun i -> tmp3 (i mod n1) (i / n1))
Chris@42 252
Chris@42 253 (*
Chris@42 254 * This is "exponent -1" split-radix by Dan Bernstein.
Chris@42 255 *)
Chris@42 256 and split_radix_dit sign n input =
Chris@42 257 let f0 = dft sign (n / 2) (fun i -> input (i * 2))
Chris@42 258 and f10 = dft sign (n / 4) (fun i -> input (i * 4 + 1))
Chris@42 259 and f11 = dft sign (n / 4) (fun i -> input ((n + i * 4 - 1) mod n)) in
Chris@42 260 let g10 = array n (fun k ->
Chris@42 261 exp n (sign * k) @* f10 (k mod (n / 4)))
Chris@42 262 and g11 = array n (fun k ->
Chris@42 263 exp n (- sign * k) @* f11 (k mod (n / 4))) in
Chris@42 264 let g1 = array n (fun k -> g10 k @+ g11 k) in
Chris@42 265 array n (fun k -> f0 (k mod (n / 2)) @+ g1 k)
Chris@42 266
Chris@42 267 and split_radix_dif sign n input =
Chris@42 268 let n2 = n / 2 and n4 = n / 4 in
Chris@42 269 let x0 = array n2 (fun i -> input i @+ input (i + n2))
Chris@42 270 and x10 = array n4 (fun i -> input i @- input (i + n2))
Chris@42 271 and x11 = array n4 (fun i ->
Chris@42 272 input (i + n4) @- input (i + n2 + n4)) in
Chris@42 273 let x1 k i =
Chris@42 274 exp n (k * i * sign) @* (x10 i @+ exp 4 (k * sign) @* x11 i) in
Chris@42 275 let f0 = dft sign n2 x0
Chris@42 276 and f1 = array 4 (fun k -> dft sign n4 (x1 k)) in
Chris@42 277 array n (fun k ->
Chris@42 278 if k mod 2 = 0 then f0 (k / 2)
Chris@42 279 else let k' = k mod 4 in f1 k' ((k - k') / 4))
Chris@42 280
Chris@42 281 and prime_factor sign n1 n2 input =
Chris@42 282 let tmp1 = array n2 (fun i2 ->
Chris@42 283 dft sign n1 (fun i1 -> input ((i1 * n2 + i2 * n1) mod n)))
Chris@42 284 in let tmp2 = array n1 (fun i1 ->
Chris@42 285 dft sign n2 (fun k2 -> tmp1 k2 i1))
Chris@42 286 in fun i -> tmp2 (i mod n1) (i mod n2)
Chris@42 287
Chris@42 288 in let algorithm sign n =
Chris@42 289 let r = choose_factor n in
Chris@42 290 if List.mem n !Magic.rader_list then
Chris@42 291 (* special cases *)
Chris@42 292 dft_rader sign n
Chris@42 293 else if (r == 1) then (* n is prime *)
Chris@42 294 dft_prime sign n
Chris@42 295 else if (gcd r (n / r)) == 1 then
Chris@42 296 prime_factor sign r (n / r)
Chris@42 297 else if (n mod 4 = 0 && n > 4) then
Chris@42 298 if !Magic.newsplit && is_power_of_two n then
Chris@42 299 newsplit sign n
Chris@42 300 else if !Magic.dif_split_radix then
Chris@42 301 split_radix_dif sign n
Chris@42 302 else
Chris@42 303 split_radix_dit sign n
Chris@42 304 else
Chris@42 305 cooley_tukey sign r (n / r)
Chris@42 306 in
Chris@42 307 array n (algorithm sign n input)