annotate fft/fftw/fftw-3.3.4/genfft/fft.ml @ 40:223f770b5341 kissfft-double tip

Try a double-precision kissfft
author Chris Cannam
date Wed, 07 Sep 2016 10:40:32 +0100
parents 26056e866c29
children
rev   line source
Chris@19 1 (*
Chris@19 2 * Copyright (c) 1997-1999 Massachusetts Institute of Technology
Chris@19 3 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@19 4 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@19 5 *
Chris@19 6 * This program is free software; you can redistribute it and/or modify
Chris@19 7 * it under the terms of the GNU General Public License as published by
Chris@19 8 * the Free Software Foundation; either version 2 of the License, or
Chris@19 9 * (at your option) any later version.
Chris@19 10 *
Chris@19 11 * This program is distributed in the hope that it will be useful,
Chris@19 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@19 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@19 14 * GNU General Public License for more details.
Chris@19 15 *
Chris@19 16 * You should have received a copy of the GNU General Public License
Chris@19 17 * along with this program; if not, write to the Free Software
Chris@19 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@19 19 *
Chris@19 20 *)
Chris@19 21
Chris@19 22
Chris@19 23 (* This is the part of the generator that actually computes the FFT
Chris@19 24 in symbolic form *)
Chris@19 25
Chris@19 26 open Complex
Chris@19 27 open Util
Chris@19 28
Chris@19 29 (* choose a suitable factor of n *)
Chris@19 30 let choose_factor n =
Chris@19 31 (* first choice: i such that gcd(i, n / i) = 1, i as big as possible *)
Chris@19 32 let choose1 n =
Chris@19 33 let rec loop i f =
Chris@19 34 if (i * i > n) then f
Chris@19 35 else if ((n mod i) == 0 && gcd i (n / i) == 1) then loop (i + 1) i
Chris@19 36 else loop (i + 1) f
Chris@19 37 in loop 1 1
Chris@19 38
Chris@19 39 (* second choice: the biggest factor i of n, where i < sqrt(n), if any *)
Chris@19 40 and choose2 n =
Chris@19 41 let rec loop i f =
Chris@19 42 if (i * i > n) then f
Chris@19 43 else if ((n mod i) == 0) then loop (i + 1) i
Chris@19 44 else loop (i + 1) f
Chris@19 45 in loop 1 1
Chris@19 46
Chris@19 47 in let i = choose1 n in
Chris@19 48 if (i > 1) then i
Chris@19 49 else choose2 n
Chris@19 50
Chris@19 51 let is_power_of_two n = (n > 0) && ((n - 1) land n == 0)
Chris@19 52
Chris@19 53 let rec dft_prime sign n input =
Chris@19 54 let sum filter i =
Chris@19 55 sigma 0 n (fun j ->
Chris@19 56 let coeff = filter (exp n (sign * i * j))
Chris@19 57 in coeff @* (input j)) in
Chris@19 58 let computation_even = array n (sum identity)
Chris@19 59 and computation_odd =
Chris@19 60 let sumr = array n (sum real)
Chris@19 61 and sumi = array n (sum ((times Complex.i) @@ imag)) in
Chris@19 62 array n (fun i ->
Chris@19 63 if (i = 0) then
Chris@19 64 (* expose some common subexpressions *)
Chris@19 65 input 0 @+
Chris@19 66 sigma 1 ((n + 1) / 2) (fun j -> input j @+ input (n - j))
Chris@19 67 else
Chris@19 68 let i' = min i (n - i) in
Chris@19 69 if (i < n - i) then
Chris@19 70 sumr i' @+ sumi i'
Chris@19 71 else
Chris@19 72 sumr i' @- sumi i') in
Chris@19 73 if (n >= !Magic.rader_min) then
Chris@19 74 dft_rader sign n input
Chris@19 75 else if (n == 2) then
Chris@19 76 computation_even
Chris@19 77 else
Chris@19 78 computation_odd
Chris@19 79
Chris@19 80
Chris@19 81 and dft_rader sign p input =
Chris@19 82 let half =
Chris@19 83 let one_half = inverse_int 2 in
Chris@19 84 times one_half
Chris@19 85
Chris@19 86 and make_product n a b =
Chris@19 87 let scale_factor = inverse_int n in
Chris@19 88 array n (fun i -> a i @* (scale_factor @* b i)) in
Chris@19 89
Chris@19 90 (* generates a convolution using ffts. (all arguments are the
Chris@19 91 same as to gen_convolution, below) *)
Chris@19 92 let gen_convolution_by_fft n a b addtoall =
Chris@19 93 let fft_a = dft 1 n a
Chris@19 94 and fft_b = dft 1 n b in
Chris@19 95
Chris@19 96 let fft_ab = make_product n fft_a fft_b
Chris@19 97 and dc_term i = if (i == 0) then addtoall else zero in
Chris@19 98
Chris@19 99 let fft_ab1 = array n (fun i -> fft_ab i @+ dc_term i)
Chris@19 100 and sum = fft_a 0 in
Chris@19 101 let conv = dft (-1) n fft_ab1 in
Chris@19 102 (sum, conv)
Chris@19 103
Chris@19 104 (* alternate routine for convolution. Seems to work better for
Chris@19 105 small sizes. I have no idea why. *)
Chris@19 106 and gen_convolution_by_fft_alt n a b addtoall =
Chris@19 107 let ap = array n (fun i -> half (a i @+ a ((n - i) mod n)))
Chris@19 108 and am = array n (fun i -> half (a i @- a ((n - i) mod n)))
Chris@19 109 and bp = array n (fun i -> half (b i @+ b ((n - i) mod n)))
Chris@19 110 and bm = array n (fun i -> half (b i @- b ((n - i) mod n)))
Chris@19 111 in
Chris@19 112
Chris@19 113 let fft_ap = dft 1 n ap
Chris@19 114 and fft_am = dft 1 n am
Chris@19 115 and fft_bp = dft 1 n bp
Chris@19 116 and fft_bm = dft 1 n bm in
Chris@19 117
Chris@19 118 let fft_abpp = make_product n fft_ap fft_bp
Chris@19 119 and fft_abpm = make_product n fft_ap fft_bm
Chris@19 120 and fft_abmp = make_product n fft_am fft_bp
Chris@19 121 and fft_abmm = make_product n fft_am fft_bm
Chris@19 122 and sum = fft_ap 0 @+ fft_am 0
Chris@19 123 and dc_term i = if (i == 0) then addtoall else zero in
Chris@19 124
Chris@19 125 let fft_ab1 = array n (fun i -> (fft_abpp i @+ fft_abmm i) @+ dc_term i)
Chris@19 126 and fft_ab2 = array n (fun i -> fft_abpm i @+ fft_abmp i) in
Chris@19 127 let conv1 = dft (-1) n fft_ab1
Chris@19 128 and conv2 = dft (-1) n fft_ab2 in
Chris@19 129 let conv = array n (fun i ->
Chris@19 130 conv1 i @+ conv2 i) in
Chris@19 131 (sum, conv)
Chris@19 132
Chris@19 133 (* generator of assignment list assigning conv to the convolution of
Chris@19 134 a and b, all of which are of length n. addtoall is added to
Chris@19 135 all of the elements of the result. Returns (sum, convolution) pair
Chris@19 136 where sum is the sum of the elements of a. *)
Chris@19 137
Chris@19 138 in let gen_convolution =
Chris@19 139 if (p <= !Magic.alternate_convolution) then
Chris@19 140 gen_convolution_by_fft_alt
Chris@19 141 else
Chris@19 142 gen_convolution_by_fft
Chris@19 143
Chris@19 144 (* fft generator for prime n = p using Rader's algorithm for
Chris@19 145 turning the fft into a convolution, which then can be
Chris@19 146 performed in a variety of ways *)
Chris@19 147 in
Chris@19 148 let g = find_generator p in
Chris@19 149 let ginv = pow_mod g (p - 2) p in
Chris@19 150 let input_perm = array p (fun i -> input (pow_mod g i p))
Chris@19 151 and omega_perm = array p (fun i -> exp p (sign * (pow_mod ginv i p)))
Chris@19 152 and output_perm = array p (fun i -> pow_mod ginv i p)
Chris@19 153 in let (sum, conv) =
Chris@19 154 (gen_convolution (p - 1) input_perm omega_perm (input 0))
Chris@19 155 in array p (fun i ->
Chris@19 156 if (i = 0) then
Chris@19 157 input 0 @+ sum
Chris@19 158 else
Chris@19 159 let i' = suchthat 0 (fun i' -> i = output_perm i')
Chris@19 160 in conv i')
Chris@19 161
Chris@19 162 (* our modified version of the conjugate-pair split-radix algorithm,
Chris@19 163 which reduces the number of multiplications by rescaling the
Chris@19 164 sub-transforms (power-of-two n's only) *)
Chris@19 165 and newsplit sign n input =
Chris@19 166 let rec s n k = (* recursive scale factor *)
Chris@19 167 if n <= 4 then
Chris@19 168 one
Chris@19 169 else
Chris@19 170 let k4 = (abs k) mod (n / 4) in
Chris@19 171 let k4' = if k4 <= (n / 8) then k4 else (n/4 - k4) in
Chris@19 172 (s (n / 4) k4') @* (real (exp n k4'))
Chris@19 173
Chris@19 174 and sinv n k = (* 1 / s(n,k) *)
Chris@19 175 if n <= 4 then
Chris@19 176 one
Chris@19 177 else
Chris@19 178 let k4 = (abs k) mod (n / 4) in
Chris@19 179 let k4' = if k4 <= (n / 8) then k4 else (n/4 - k4) in
Chris@19 180 (sinv (n / 4) k4') @* (sec n k4')
Chris@19 181
Chris@19 182 in let sdiv2 n k = (s n k) @* (sinv (2*n) k) (* s(n,k) / s(2*n,k) *)
Chris@19 183 and sdiv4 n k = (* s(n,k) / s(4*n,k) *)
Chris@19 184 let k4 = (abs k) mod n in
Chris@19 185 sec (4*n) (if k4 <= (n / 2) then k4 else (n - k4))
Chris@19 186
Chris@19 187 in let t n k = (exp n k) @* (sdiv4 (n/4) k)
Chris@19 188
Chris@19 189 and dft1 input = input
Chris@19 190 and dft2 input = array 2 (fun k -> (input 0) @+ ((input 1) @* exp 2 k))
Chris@19 191
Chris@19 192 in let rec newsplit0 sign n input =
Chris@19 193 if (n == 1) then dft1 input
Chris@19 194 else if (n == 2) then dft2 input
Chris@19 195 else let u = newsplit0 sign (n / 2) (fun i -> input (i*2))
Chris@19 196 and z = newsplitS sign (n / 4) (fun i -> input (i*4 + 1))
Chris@19 197 and z' = newsplitS sign (n / 4) (fun i -> input ((n + i*4 - 1) mod n))
Chris@19 198 and twid = array n (fun k -> s (n/4) k @* exp n (sign * k)) in
Chris@19 199 let w = array n (fun k -> twid k @* z (k mod (n / 4)))
Chris@19 200 and w' = array n (fun k -> conj (twid k) @* z' (k mod (n / 4))) in
Chris@19 201 let ww = array n (fun k -> w k @+ w' k) in
Chris@19 202 array n (fun k -> u (k mod (n / 2)) @+ ww k)
Chris@19 203
Chris@19 204 and newsplitS sign n input =
Chris@19 205 if (n == 1) then dft1 input
Chris@19 206 else if (n == 2) then dft2 input
Chris@19 207 else let u = newsplitS2 sign (n / 2) (fun i -> input (i*2))
Chris@19 208 and z = newsplitS sign (n / 4) (fun i -> input (i*4 + 1))
Chris@19 209 and z' = newsplitS sign (n / 4) (fun i -> input ((n + i*4 - 1) mod n)) in
Chris@19 210 let w = array n (fun k -> t n (sign * k) @* z (k mod (n / 4)))
Chris@19 211 and w' = array n (fun k -> conj (t n (sign * k)) @* z' (k mod (n / 4))) in
Chris@19 212 let ww = array n (fun k -> w k @+ w' k) in
Chris@19 213 array n (fun k -> u (k mod (n / 2)) @+ ww k)
Chris@19 214
Chris@19 215 and newsplitS2 sign n input =
Chris@19 216 if (n == 1) then dft1 input
Chris@19 217 else if (n == 2) then dft2 input
Chris@19 218 else let u = newsplitS4 sign (n / 2) (fun i -> input (i*2))
Chris@19 219 and z = newsplitS sign (n / 4) (fun i -> input (i*4 + 1))
Chris@19 220 and z' = newsplitS sign (n / 4) (fun i -> input ((n + i*4 - 1) mod n)) in
Chris@19 221 let w = array n (fun k -> t n (sign * k) @* z (k mod (n / 4)))
Chris@19 222 and w' = array n (fun k -> conj (t n (sign * k)) @* z' (k mod (n / 4))) in
Chris@19 223 let ww = array n (fun k -> (w k @+ w' k) @* (sdiv2 n k)) in
Chris@19 224 array n (fun k -> u (k mod (n / 2)) @+ ww k)
Chris@19 225
Chris@19 226 and newsplitS4 sign n input =
Chris@19 227 if (n == 1) then dft1 input
Chris@19 228 else if (n == 2) then
Chris@19 229 let f = dft2 input
Chris@19 230 in array 2 (fun k -> (f k) @* (sinv 8 k))
Chris@19 231 else let u = newsplitS2 sign (n / 2) (fun i -> input (i*2))
Chris@19 232 and z = newsplitS sign (n / 4) (fun i -> input (i*4 + 1))
Chris@19 233 and z' = newsplitS sign (n / 4) (fun i -> input ((n + i*4 - 1) mod n)) in
Chris@19 234 let w = array n (fun k -> t n (sign * k) @* z (k mod (n / 4)))
Chris@19 235 and w' = array n (fun k -> conj (t n (sign * k)) @* z' (k mod (n / 4))) in
Chris@19 236 let ww = array n (fun k -> w k @+ w' k) in
Chris@19 237 array n (fun k -> (u (k mod (n / 2)) @+ ww k) @* (sdiv4 n k))
Chris@19 238
Chris@19 239 in newsplit0 sign n input
Chris@19 240
Chris@19 241 and dft sign n input =
Chris@19 242 let rec cooley_tukey sign n1 n2 input =
Chris@19 243 let tmp1 =
Chris@19 244 array n2 (fun i2 ->
Chris@19 245 dft sign n1 (fun i1 -> input (i1 * n2 + i2))) in
Chris@19 246 let tmp2 =
Chris@19 247 array n1 (fun i1 ->
Chris@19 248 array n2 (fun i2 ->
Chris@19 249 exp n (sign * i1 * i2) @* tmp1 i2 i1)) in
Chris@19 250 let tmp3 = array n1 (fun i1 -> dft sign n2 (tmp2 i1)) in
Chris@19 251 (fun i -> tmp3 (i mod n1) (i / n1))
Chris@19 252
Chris@19 253 (*
Chris@19 254 * This is "exponent -1" split-radix by Dan Bernstein.
Chris@19 255 *)
Chris@19 256 and split_radix_dit sign n input =
Chris@19 257 let f0 = dft sign (n / 2) (fun i -> input (i * 2))
Chris@19 258 and f10 = dft sign (n / 4) (fun i -> input (i * 4 + 1))
Chris@19 259 and f11 = dft sign (n / 4) (fun i -> input ((n + i * 4 - 1) mod n)) in
Chris@19 260 let g10 = array n (fun k ->
Chris@19 261 exp n (sign * k) @* f10 (k mod (n / 4)))
Chris@19 262 and g11 = array n (fun k ->
Chris@19 263 exp n (- sign * k) @* f11 (k mod (n / 4))) in
Chris@19 264 let g1 = array n (fun k -> g10 k @+ g11 k) in
Chris@19 265 array n (fun k -> f0 (k mod (n / 2)) @+ g1 k)
Chris@19 266
Chris@19 267 and split_radix_dif sign n input =
Chris@19 268 let n2 = n / 2 and n4 = n / 4 in
Chris@19 269 let x0 = array n2 (fun i -> input i @+ input (i + n2))
Chris@19 270 and x10 = array n4 (fun i -> input i @- input (i + n2))
Chris@19 271 and x11 = array n4 (fun i ->
Chris@19 272 input (i + n4) @- input (i + n2 + n4)) in
Chris@19 273 let x1 k i =
Chris@19 274 exp n (k * i * sign) @* (x10 i @+ exp 4 (k * sign) @* x11 i) in
Chris@19 275 let f0 = dft sign n2 x0
Chris@19 276 and f1 = array 4 (fun k -> dft sign n4 (x1 k)) in
Chris@19 277 array n (fun k ->
Chris@19 278 if k mod 2 = 0 then f0 (k / 2)
Chris@19 279 else let k' = k mod 4 in f1 k' ((k - k') / 4))
Chris@19 280
Chris@19 281 and prime_factor sign n1 n2 input =
Chris@19 282 let tmp1 = array n2 (fun i2 ->
Chris@19 283 dft sign n1 (fun i1 -> input ((i1 * n2 + i2 * n1) mod n)))
Chris@19 284 in let tmp2 = array n1 (fun i1 ->
Chris@19 285 dft sign n2 (fun k2 -> tmp1 k2 i1))
Chris@19 286 in fun i -> tmp2 (i mod n1) (i mod n2)
Chris@19 287
Chris@19 288 in let algorithm sign n =
Chris@19 289 let r = choose_factor n in
Chris@19 290 if List.mem n !Magic.rader_list then
Chris@19 291 (* special cases *)
Chris@19 292 dft_rader sign n
Chris@19 293 else if (r == 1) then (* n is prime *)
Chris@19 294 dft_prime sign n
Chris@19 295 else if (gcd r (n / r)) == 1 then
Chris@19 296 prime_factor sign r (n / r)
Chris@19 297 else if (n mod 4 = 0 && n > 4) then
Chris@19 298 if !Magic.newsplit && is_power_of_two n then
Chris@19 299 newsplit sign n
Chris@19 300 else if !Magic.dif_split_radix then
Chris@19 301 split_radix_dif sign n
Chris@19 302 else
Chris@19 303 split_radix_dit sign n
Chris@19 304 else
Chris@19 305 cooley_tukey sign r (n / r)
Chris@19 306 in
Chris@19 307 array n (algorithm sign n input)