Mercurial > hg > sv-dependency-builds
comparison src/fftw-3.3.3/libbench2/verify-rdft2.c @ 10:37bf6b4a2645
Add FFTW3
author | Chris Cannam |
---|---|
date | Wed, 20 Mar 2013 15:35:50 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
9:c0fb53affa76 | 10:37bf6b4a2645 |
---|---|
1 /* | |
2 * Copyright (c) 2003, 2007-11 Matteo Frigo | |
3 * Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology | |
4 * | |
5 * This program is free software; you can redistribute it and/or modify | |
6 * it under the terms of the GNU General Public License as published by | |
7 * the Free Software Foundation; either version 2 of the License, or | |
8 * (at your option) any later version. | |
9 * | |
10 * This program is distributed in the hope that it will be useful, | |
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 * GNU General Public License for more details. | |
14 * | |
15 * You should have received a copy of the GNU General Public License | |
16 * along with this program; if not, write to the Free Software | |
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
18 * | |
19 */ | |
20 | |
21 | |
22 #include "verify.h" | |
23 | |
24 /* copy real A into real B, using output stride of A and input stride of B */ | |
25 typedef struct { | |
26 dotens2_closure k; | |
27 R *ra; | |
28 R *rb; | |
29 } cpyr_closure; | |
30 | |
31 static void cpyr0(dotens2_closure *k_, | |
32 int indxa, int ondxa, int indxb, int ondxb) | |
33 { | |
34 cpyr_closure *k = (cpyr_closure *)k_; | |
35 k->rb[indxb] = k->ra[ondxa]; | |
36 UNUSED(indxa); UNUSED(ondxb); | |
37 } | |
38 | |
39 static void cpyr(R *ra, const bench_tensor *sza, | |
40 R *rb, const bench_tensor *szb) | |
41 { | |
42 cpyr_closure k; | |
43 k.k.apply = cpyr0; | |
44 k.ra = ra; k.rb = rb; | |
45 bench_dotens2(sza, szb, &k.k); | |
46 } | |
47 | |
48 /* copy unpacked halfcomplex A[n] into packed-complex B[n], using output stride | |
49 of A and input stride of B. Only copies non-redundant half; other | |
50 half must be copied via mkhermitian. */ | |
51 typedef struct { | |
52 dotens2_closure k; | |
53 int n; | |
54 int as; | |
55 int scalea; | |
56 R *ra, *ia; | |
57 R *rb, *ib; | |
58 } cpyhc2_closure; | |
59 | |
60 static void cpyhc20(dotens2_closure *k_, | |
61 int indxa, int ondxa, int indxb, int ondxb) | |
62 { | |
63 cpyhc2_closure *k = (cpyhc2_closure *)k_; | |
64 int i, n = k->n; | |
65 int scalea = k->scalea; | |
66 int as = k->as * scalea; | |
67 R *ra = k->ra + ondxa * scalea, *ia = k->ia + ondxa * scalea; | |
68 R *rb = k->rb + indxb, *ib = k->ib + indxb; | |
69 UNUSED(indxa); UNUSED(ondxb); | |
70 | |
71 for (i = 0; i < n/2 + 1; ++i) { | |
72 rb[2*i] = ra[as*i]; | |
73 ib[2*i] = ia[as*i]; | |
74 } | |
75 } | |
76 | |
77 static void cpyhc2(R *ra, R *ia, | |
78 const bench_tensor *sza, const bench_tensor *vecsza, | |
79 int scalea, | |
80 R *rb, R *ib, const bench_tensor *szb) | |
81 { | |
82 cpyhc2_closure k; | |
83 BENCH_ASSERT(sza->rnk <= 1); | |
84 k.k.apply = cpyhc20; | |
85 k.n = tensor_sz(sza); | |
86 k.scalea = scalea; | |
87 if (!FINITE_RNK(sza->rnk) || sza->rnk == 0) | |
88 k.as = 0; | |
89 else | |
90 k.as = sza->dims[0].os; | |
91 k.ra = ra; k.ia = ia; k.rb = rb; k.ib = ib; | |
92 bench_dotens2(vecsza, szb, &k.k); | |
93 } | |
94 | |
95 /* icpyhc2 is the inverse of cpyhc2 */ | |
96 | |
97 static void icpyhc20(dotens2_closure *k_, | |
98 int indxa, int ondxa, int indxb, int ondxb) | |
99 { | |
100 cpyhc2_closure *k = (cpyhc2_closure *)k_; | |
101 int i, n = k->n; | |
102 int scalea = k->scalea; | |
103 int as = k->as * scalea; | |
104 R *ra = k->ra + indxa * scalea, *ia = k->ia + indxa * scalea; | |
105 R *rb = k->rb + ondxb, *ib = k->ib + ondxb; | |
106 UNUSED(ondxa); UNUSED(indxb); | |
107 | |
108 for (i = 0; i < n/2 + 1; ++i) { | |
109 ra[as*i] = rb[2*i]; | |
110 ia[as*i] = ib[2*i]; | |
111 } | |
112 } | |
113 | |
114 static void icpyhc2(R *ra, R *ia, | |
115 const bench_tensor *sza, const bench_tensor *vecsza, | |
116 int scalea, | |
117 R *rb, R *ib, const bench_tensor *szb) | |
118 { | |
119 cpyhc2_closure k; | |
120 BENCH_ASSERT(sza->rnk <= 1); | |
121 k.k.apply = icpyhc20; | |
122 k.n = tensor_sz(sza); | |
123 k.scalea = scalea; | |
124 if (!FINITE_RNK(sza->rnk) || sza->rnk == 0) | |
125 k.as = 0; | |
126 else | |
127 k.as = sza->dims[0].is; | |
128 k.ra = ra; k.ia = ia; k.rb = rb; k.ib = ib; | |
129 bench_dotens2(vecsza, szb, &k.k); | |
130 } | |
131 | |
132 typedef struct { | |
133 dofft_closure k; | |
134 bench_problem *p; | |
135 } dofft_rdft2_closure; | |
136 | |
137 static void rdft2_apply(dofft_closure *k_, | |
138 bench_complex *in, bench_complex *out) | |
139 { | |
140 dofft_rdft2_closure *k = (dofft_rdft2_closure *)k_; | |
141 bench_problem *p = k->p; | |
142 bench_tensor *totalsz, *pckdsz, *totalsz_swap, *pckdsz_swap; | |
143 bench_tensor *probsz2, *totalsz2, *pckdsz2; | |
144 bench_tensor *probsz2_swap, *totalsz2_swap, *pckdsz2_swap; | |
145 bench_real *ri, *ii, *ro, *io; | |
146 int n2, totalscale; | |
147 | |
148 totalsz = tensor_append(p->vecsz, p->sz); | |
149 pckdsz = verify_pack(totalsz, 2); | |
150 n2 = tensor_sz(totalsz); | |
151 if (FINITE_RNK(p->sz->rnk) && p->sz->rnk > 0) | |
152 n2 = (n2 / p->sz->dims[p->sz->rnk - 1].n) * | |
153 (p->sz->dims[p->sz->rnk - 1].n / 2 + 1); | |
154 ri = (bench_real *) p->in; | |
155 ro = (bench_real *) p->out; | |
156 | |
157 if (FINITE_RNK(p->sz->rnk) && p->sz->rnk > 0 && n2 > 0) { | |
158 probsz2 = tensor_copy_sub(p->sz, p->sz->rnk - 1, 1); | |
159 totalsz2 = tensor_copy_sub(totalsz, 0, totalsz->rnk - 1); | |
160 pckdsz2 = tensor_copy_sub(pckdsz, 0, pckdsz->rnk - 1); | |
161 } | |
162 else { | |
163 probsz2 = mktensor(0); | |
164 totalsz2 = tensor_copy(totalsz); | |
165 pckdsz2 = tensor_copy(pckdsz); | |
166 } | |
167 | |
168 totalsz_swap = tensor_copy_swapio(totalsz); | |
169 pckdsz_swap = tensor_copy_swapio(pckdsz); | |
170 totalsz2_swap = tensor_copy_swapio(totalsz2); | |
171 pckdsz2_swap = tensor_copy_swapio(pckdsz2); | |
172 probsz2_swap = tensor_copy_swapio(probsz2); | |
173 | |
174 /* confusion: the stride is the distance between complex elements | |
175 when using interleaved format, but it is the distance between | |
176 real elements when using split format */ | |
177 if (p->split) { | |
178 ii = p->ini ? (bench_real *) p->ini : ri + n2; | |
179 io = p->outi ? (bench_real *) p->outi : ro + n2; | |
180 totalscale = 1; | |
181 } else { | |
182 ii = p->ini ? (bench_real *) p->ini : ri + 1; | |
183 io = p->outi ? (bench_real *) p->outi : ro + 1; | |
184 totalscale = 2; | |
185 } | |
186 | |
187 if (p->sign < 0) { /* R2HC */ | |
188 int N, vN, i; | |
189 cpyr(&c_re(in[0]), pckdsz, ri, totalsz); | |
190 after_problem_rcopy_from(p, ri); | |
191 doit(1, p); | |
192 after_problem_hccopy_to(p, ro, io); | |
193 if (k->k.recopy_input) | |
194 cpyr(ri, totalsz_swap, &c_re(in[0]), pckdsz_swap); | |
195 cpyhc2(ro, io, probsz2, totalsz2, totalscale, | |
196 &c_re(out[0]), &c_im(out[0]), pckdsz2); | |
197 N = tensor_sz(p->sz); | |
198 vN = tensor_sz(p->vecsz); | |
199 for (i = 0; i < vN; ++i) | |
200 mkhermitian(out + i*N, p->sz->rnk, p->sz->dims, 1); | |
201 } | |
202 else { /* HC2R */ | |
203 icpyhc2(ri, ii, probsz2, totalsz2, totalscale, | |
204 &c_re(in[0]), &c_im(in[0]), pckdsz2); | |
205 after_problem_hccopy_from(p, ri, ii); | |
206 doit(1, p); | |
207 after_problem_rcopy_to(p, ro); | |
208 if (k->k.recopy_input) | |
209 cpyhc2(ri, ii, probsz2_swap, totalsz2_swap, totalscale, | |
210 &c_re(in[0]), &c_im(in[0]), pckdsz2_swap); | |
211 mkreal(out, tensor_sz(pckdsz)); | |
212 cpyr(ro, totalsz, &c_re(out[0]), pckdsz); | |
213 } | |
214 | |
215 tensor_destroy(totalsz); | |
216 tensor_destroy(pckdsz); | |
217 tensor_destroy(totalsz_swap); | |
218 tensor_destroy(pckdsz_swap); | |
219 tensor_destroy(probsz2); | |
220 tensor_destroy(totalsz2); | |
221 tensor_destroy(pckdsz2); | |
222 tensor_destroy(probsz2_swap); | |
223 tensor_destroy(totalsz2_swap); | |
224 tensor_destroy(pckdsz2_swap); | |
225 } | |
226 | |
227 void verify_rdft2(bench_problem *p, int rounds, double tol, errors *e) | |
228 { | |
229 C *inA, *inB, *inC, *outA, *outB, *outC, *tmp; | |
230 int n, vecn, N; | |
231 dofft_rdft2_closure k; | |
232 | |
233 BENCH_ASSERT(p->kind == PROBLEM_REAL); | |
234 | |
235 if (!FINITE_RNK(p->sz->rnk) || !FINITE_RNK(p->vecsz->rnk)) | |
236 return; /* give up */ | |
237 | |
238 k.k.apply = rdft2_apply; | |
239 k.k.recopy_input = 0; | |
240 k.p = p; | |
241 | |
242 if (rounds == 0) | |
243 rounds = 20; /* default value */ | |
244 | |
245 n = tensor_sz(p->sz); | |
246 vecn = tensor_sz(p->vecsz); | |
247 N = n * vecn; | |
248 | |
249 inA = (C *) bench_malloc(N * sizeof(C)); | |
250 inB = (C *) bench_malloc(N * sizeof(C)); | |
251 inC = (C *) bench_malloc(N * sizeof(C)); | |
252 outA = (C *) bench_malloc(N * sizeof(C)); | |
253 outB = (C *) bench_malloc(N * sizeof(C)); | |
254 outC = (C *) bench_malloc(N * sizeof(C)); | |
255 tmp = (C *) bench_malloc(N * sizeof(C)); | |
256 | |
257 e->i = impulse(&k.k, n, vecn, inA, inB, inC, outA, outB, outC, | |
258 tmp, rounds, tol); | |
259 e->l = linear(&k.k, 1, N, inA, inB, inC, outA, outB, outC, | |
260 tmp, rounds, tol); | |
261 | |
262 e->s = 0.0; | |
263 if (p->sign < 0) | |
264 e->s = dmax(e->s, tf_shift(&k.k, 1, p->sz, n, vecn, p->sign, | |
265 inA, inB, outA, outB, | |
266 tmp, rounds, tol, TIME_SHIFT)); | |
267 else | |
268 e->s = dmax(e->s, tf_shift(&k.k, 1, p->sz, n, vecn, p->sign, | |
269 inA, inB, outA, outB, | |
270 tmp, rounds, tol, FREQ_SHIFT)); | |
271 | |
272 if (!p->in_place && !p->destroy_input) | |
273 preserves_input(&k.k, p->sign < 0 ? mkreal : mkhermitian1, | |
274 N, inA, inB, outB, rounds); | |
275 | |
276 bench_free(tmp); | |
277 bench_free(outC); | |
278 bench_free(outB); | |
279 bench_free(outA); | |
280 bench_free(inC); | |
281 bench_free(inB); | |
282 bench_free(inA); | |
283 } | |
284 | |
285 void accuracy_rdft2(bench_problem *p, int rounds, int impulse_rounds, | |
286 double t[6]) | |
287 { | |
288 dofft_rdft2_closure k; | |
289 int n; | |
290 C *a, *b; | |
291 | |
292 BENCH_ASSERT(p->kind == PROBLEM_REAL); | |
293 BENCH_ASSERT(p->sz->rnk == 1); | |
294 BENCH_ASSERT(p->vecsz->rnk == 0); | |
295 | |
296 k.k.apply = rdft2_apply; | |
297 k.k.recopy_input = 0; | |
298 k.p = p; | |
299 n = tensor_sz(p->sz); | |
300 | |
301 a = (C *) bench_malloc(n * sizeof(C)); | |
302 b = (C *) bench_malloc(n * sizeof(C)); | |
303 accuracy_test(&k.k, p->sign < 0 ? mkreal : mkhermitian1, p->sign, | |
304 n, a, b, rounds, impulse_rounds, t); | |
305 bench_free(b); | |
306 bench_free(a); | |
307 } |