Mercurial > hg > sv-dependency-builds
comparison src/fftw-3.3.3/libbench2/verify-dft.c @ 10:37bf6b4a2645
Add FFTW3
author | Chris Cannam |
---|---|
date | Wed, 20 Mar 2013 15:35:50 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
9:c0fb53affa76 | 10:37bf6b4a2645 |
---|---|
1 /* | |
2 * Copyright (c) 2003, 2007-11 Matteo Frigo | |
3 * Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology | |
4 * | |
5 * This program is free software; you can redistribute it and/or modify | |
6 * it under the terms of the GNU General Public License as published by | |
7 * the Free Software Foundation; either version 2 of the License, or | |
8 * (at your option) any later version. | |
9 * | |
10 * This program is distributed in the hope that it will be useful, | |
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 * GNU General Public License for more details. | |
14 * | |
15 * You should have received a copy of the GNU General Public License | |
16 * along with this program; if not, write to the Free Software | |
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
18 * | |
19 */ | |
20 | |
21 | |
22 #include "verify.h" | |
23 | |
24 /* copy A into B, using output stride of A and input stride of B */ | |
25 typedef struct { | |
26 dotens2_closure k; | |
27 R *ra; R *ia; | |
28 R *rb; R *ib; | |
29 int scalea, scaleb; | |
30 } cpy_closure; | |
31 | |
32 static void cpy0(dotens2_closure *k_, | |
33 int indxa, int ondxa, int indxb, int ondxb) | |
34 { | |
35 cpy_closure *k = (cpy_closure *)k_; | |
36 k->rb[indxb * k->scaleb] = k->ra[ondxa * k->scalea]; | |
37 k->ib[indxb * k->scaleb] = k->ia[ondxa * k->scalea]; | |
38 UNUSED(indxa); UNUSED(ondxb); | |
39 } | |
40 | |
41 static void cpy(R *ra, R *ia, const bench_tensor *sza, int scalea, | |
42 R *rb, R *ib, const bench_tensor *szb, int scaleb) | |
43 { | |
44 cpy_closure k; | |
45 k.k.apply = cpy0; | |
46 k.ra = ra; k.ia = ia; k.rb = rb; k.ib = ib; | |
47 k.scalea = scalea; k.scaleb = scaleb; | |
48 bench_dotens2(sza, szb, &k.k); | |
49 } | |
50 | |
51 typedef struct { | |
52 dofft_closure k; | |
53 bench_problem *p; | |
54 } dofft_dft_closure; | |
55 | |
56 static void dft_apply(dofft_closure *k_, bench_complex *in, bench_complex *out) | |
57 { | |
58 dofft_dft_closure *k = (dofft_dft_closure *)k_; | |
59 bench_problem *p = k->p; | |
60 bench_tensor *totalsz, *pckdsz; | |
61 bench_tensor *totalsz_swap, *pckdsz_swap; | |
62 bench_real *ri, *ii, *ro, *io; | |
63 int totalscale; | |
64 | |
65 totalsz = tensor_append(p->vecsz, p->sz); | |
66 pckdsz = verify_pack(totalsz, 2); | |
67 ri = (bench_real *) p->in; | |
68 ro = (bench_real *) p->out; | |
69 | |
70 totalsz_swap = tensor_copy_swapio(totalsz); | |
71 pckdsz_swap = tensor_copy_swapio(pckdsz); | |
72 | |
73 /* confusion: the stride is the distance between complex elements | |
74 when using interleaved format, but it is the distance between | |
75 real elements when using split format */ | |
76 if (p->split) { | |
77 ii = p->ini ? (bench_real *) p->ini : ri + p->iphyssz; | |
78 io = p->outi ? (bench_real *) p->outi : ro + p->ophyssz; | |
79 totalscale = 1; | |
80 } else { | |
81 ii = p->ini ? (bench_real *) p->ini : ri + 1; | |
82 io = p->outi ? (bench_real *) p->outi : ro + 1; | |
83 totalscale = 2; | |
84 } | |
85 | |
86 cpy(&c_re(in[0]), &c_im(in[0]), pckdsz, 1, | |
87 ri, ii, totalsz, totalscale); | |
88 after_problem_ccopy_from(p, ri, ii); | |
89 doit(1, p); | |
90 after_problem_ccopy_to(p, ro, io); | |
91 if (k->k.recopy_input) | |
92 cpy(ri, ii, totalsz_swap, totalscale, | |
93 &c_re(in[0]), &c_im(in[0]), pckdsz_swap, 1); | |
94 cpy(ro, io, totalsz, totalscale, | |
95 &c_re(out[0]), &c_im(out[0]), pckdsz, 1); | |
96 | |
97 tensor_destroy(totalsz); | |
98 tensor_destroy(pckdsz); | |
99 tensor_destroy(totalsz_swap); | |
100 tensor_destroy(pckdsz_swap); | |
101 } | |
102 | |
103 void verify_dft(bench_problem *p, int rounds, double tol, errors *e) | |
104 { | |
105 C *inA, *inB, *inC, *outA, *outB, *outC, *tmp; | |
106 int n, vecn, N; | |
107 dofft_dft_closure k; | |
108 | |
109 BENCH_ASSERT(p->kind == PROBLEM_COMPLEX); | |
110 | |
111 k.k.apply = dft_apply; | |
112 k.k.recopy_input = 0; | |
113 k.p = p; | |
114 | |
115 if (rounds == 0) | |
116 rounds = 20; /* default value */ | |
117 | |
118 n = tensor_sz(p->sz); | |
119 vecn = tensor_sz(p->vecsz); | |
120 N = n * vecn; | |
121 | |
122 inA = (C *) bench_malloc(N * sizeof(C)); | |
123 inB = (C *) bench_malloc(N * sizeof(C)); | |
124 inC = (C *) bench_malloc(N * sizeof(C)); | |
125 outA = (C *) bench_malloc(N * sizeof(C)); | |
126 outB = (C *) bench_malloc(N * sizeof(C)); | |
127 outC = (C *) bench_malloc(N * sizeof(C)); | |
128 tmp = (C *) bench_malloc(N * sizeof(C)); | |
129 | |
130 e->i = impulse(&k.k, n, vecn, inA, inB, inC, outA, outB, outC, | |
131 tmp, rounds, tol); | |
132 e->l = linear(&k.k, 0, N, inA, inB, inC, outA, outB, outC, | |
133 tmp, rounds, tol); | |
134 | |
135 e->s = 0.0; | |
136 e->s = dmax(e->s, tf_shift(&k.k, 0, p->sz, n, vecn, p->sign, | |
137 inA, inB, outA, outB, | |
138 tmp, rounds, tol, TIME_SHIFT)); | |
139 e->s = dmax(e->s, tf_shift(&k.k, 0, p->sz, n, vecn, p->sign, | |
140 inA, inB, outA, outB, | |
141 tmp, rounds, tol, FREQ_SHIFT)); | |
142 | |
143 if (!p->in_place && !p->destroy_input) | |
144 preserves_input(&k.k, 0, N, inA, inB, outB, rounds); | |
145 | |
146 bench_free(tmp); | |
147 bench_free(outC); | |
148 bench_free(outB); | |
149 bench_free(outA); | |
150 bench_free(inC); | |
151 bench_free(inB); | |
152 bench_free(inA); | |
153 } | |
154 | |
155 | |
156 void accuracy_dft(bench_problem *p, int rounds, int impulse_rounds, | |
157 double t[6]) | |
158 { | |
159 dofft_dft_closure k; | |
160 int n; | |
161 C *a, *b; | |
162 | |
163 BENCH_ASSERT(p->kind == PROBLEM_COMPLEX); | |
164 BENCH_ASSERT(p->sz->rnk == 1); | |
165 BENCH_ASSERT(p->vecsz->rnk == 0); | |
166 | |
167 k.k.apply = dft_apply; | |
168 k.k.recopy_input = 0; | |
169 k.p = p; | |
170 n = tensor_sz(p->sz); | |
171 | |
172 a = (C *) bench_malloc(n * sizeof(C)); | |
173 b = (C *) bench_malloc(n * sizeof(C)); | |
174 accuracy_test(&k.k, 0, p->sign, n, a, b, rounds, impulse_rounds, t); | |
175 bench_free(b); | |
176 bench_free(a); | |
177 } |