Mercurial > hg > sv-dependency-builds
comparison src/fftw-3.3.8/kernel/tensor7.c @ 82:d0c2a83c1364
Add FFTW 3.3.8 source, and a Linux build
author | Chris Cannam |
---|---|
date | Tue, 19 Nov 2019 14:52:55 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
81:7029a4916348 | 82:d0c2a83c1364 |
---|---|
1 /* | |
2 * Copyright (c) 2003, 2007-14 Matteo Frigo | |
3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology | |
4 * | |
5 * This program is free software; you can redistribute it and/or modify | |
6 * it under the terms of the GNU General Public License as published by | |
7 * the Free Software Foundation; either version 2 of the License, or | |
8 * (at your option) any later version. | |
9 * | |
10 * This program is distributed in the hope that it will be useful, | |
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 * GNU General Public License for more details. | |
14 * | |
15 * You should have received a copy of the GNU General Public License | |
16 * along with this program; if not, write to the Free Software | |
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
18 * | |
19 */ | |
20 | |
21 | |
22 #include "kernel/ifftw.h" | |
23 | |
24 static int signof(INT x) | |
25 { | |
26 if (x < 0) return -1; | |
27 if (x == 0) return 0; | |
28 /* if (x > 0) */ return 1; | |
29 } | |
30 | |
31 /* total order among iodim's */ | |
32 int X(dimcmp)(const iodim *a, const iodim *b) | |
33 { | |
34 INT sai = X(iabs)(a->is), sbi = X(iabs)(b->is); | |
35 INT sao = X(iabs)(a->os), sbo = X(iabs)(b->os); | |
36 INT sam = X(imin)(sai, sao), sbm = X(imin)(sbi, sbo); | |
37 | |
38 /* in descending order of min{istride, ostride} */ | |
39 if (sam != sbm) | |
40 return signof(sbm - sam); | |
41 | |
42 /* in case of a tie, in descending order of istride */ | |
43 if (sbi != sai) | |
44 return signof(sbi - sai); | |
45 | |
46 /* in case of a tie, in descending order of ostride */ | |
47 if (sbo != sao) | |
48 return signof(sbo - sao); | |
49 | |
50 /* in case of a tie, in ascending order of n */ | |
51 return signof(a->n - b->n); | |
52 } | |
53 | |
54 static void canonicalize(tensor *x) | |
55 { | |
56 if (x->rnk > 1) { | |
57 qsort(x->dims, (unsigned)x->rnk, sizeof(iodim), | |
58 (int (*)(const void *, const void *))X(dimcmp)); | |
59 } | |
60 } | |
61 | |
62 static int compare_by_istride(const iodim *a, const iodim *b) | |
63 { | |
64 INT sai = X(iabs)(a->is), sbi = X(iabs)(b->is); | |
65 | |
66 /* in descending order of istride */ | |
67 return signof(sbi - sai); | |
68 } | |
69 | |
70 static tensor *really_compress(const tensor *sz) | |
71 { | |
72 int i, rnk; | |
73 tensor *x; | |
74 | |
75 A(FINITE_RNK(sz->rnk)); | |
76 for (i = rnk = 0; i < sz->rnk; ++i) { | |
77 A(sz->dims[i].n > 0); | |
78 if (sz->dims[i].n != 1) | |
79 ++rnk; | |
80 } | |
81 | |
82 x = X(mktensor)(rnk); | |
83 for (i = rnk = 0; i < sz->rnk; ++i) { | |
84 if (sz->dims[i].n != 1) | |
85 x->dims[rnk++] = sz->dims[i]; | |
86 } | |
87 return x; | |
88 } | |
89 | |
90 /* Like tensor_copy, but eliminate n == 1 dimensions, which | |
91 never affect any transform or transform vector. | |
92 | |
93 Also, we sort the tensor into a canonical order of decreasing | |
94 strides (see X(dimcmp) for an exact definition). In general, | |
95 processing a loop/array in order of decreasing stride will improve | |
96 locality. Both forward and backwards traversal of the tensor are | |
97 considered e.g. by vrank-geq1, so sorting in increasing | |
98 vs. decreasing order is not really important. */ | |
99 tensor *X(tensor_compress)(const tensor *sz) | |
100 { | |
101 tensor *x = really_compress(sz); | |
102 canonicalize(x); | |
103 return x; | |
104 } | |
105 | |
106 /* Return whether the strides of a and b are such that they form an | |
107 effective contiguous 1d array. Assumes that a.is >= b.is. */ | |
108 static int strides_contig(iodim *a, iodim *b) | |
109 { | |
110 return (a->is == b->is * b->n && a->os == b->os * b->n); | |
111 } | |
112 | |
113 /* Like tensor_compress, but also compress into one dimension any | |
114 group of dimensions that form a contiguous block of indices with | |
115 some stride. (This can safely be done for transform vector sizes.) */ | |
116 tensor *X(tensor_compress_contiguous)(const tensor *sz) | |
117 { | |
118 int i, rnk; | |
119 tensor *sz2, *x; | |
120 | |
121 if (X(tensor_sz)(sz) == 0) | |
122 return X(mktensor)(RNK_MINFTY); | |
123 | |
124 sz2 = really_compress(sz); | |
125 A(FINITE_RNK(sz2->rnk)); | |
126 | |
127 if (sz2->rnk <= 1) { /* nothing to compress. */ | |
128 if (0) { | |
129 /* this call is redundant, because "sz->rnk <= 1" implies | |
130 that the tensor is already canonical, but I am writing | |
131 it explicitly because "logically" we need to canonicalize | |
132 the tensor before returning. */ | |
133 canonicalize(sz2); | |
134 } | |
135 return sz2; | |
136 } | |
137 | |
138 /* sort in descending order of |istride|, so that compressible | |
139 dimensions appear contigously */ | |
140 qsort(sz2->dims, (unsigned)sz2->rnk, sizeof(iodim), | |
141 (int (*)(const void *, const void *))compare_by_istride); | |
142 | |
143 /* compute what the rank will be after compression */ | |
144 for (i = rnk = 1; i < sz2->rnk; ++i) | |
145 if (!strides_contig(sz2->dims + i - 1, sz2->dims + i)) | |
146 ++rnk; | |
147 | |
148 /* merge adjacent dimensions whenever possible */ | |
149 x = X(mktensor)(rnk); | |
150 x->dims[0] = sz2->dims[0]; | |
151 for (i = rnk = 1; i < sz2->rnk; ++i) { | |
152 if (strides_contig(sz2->dims + i - 1, sz2->dims + i)) { | |
153 x->dims[rnk - 1].n *= sz2->dims[i].n; | |
154 x->dims[rnk - 1].is = sz2->dims[i].is; | |
155 x->dims[rnk - 1].os = sz2->dims[i].os; | |
156 } else { | |
157 A(rnk < x->rnk); | |
158 x->dims[rnk++] = sz2->dims[i]; | |
159 } | |
160 } | |
161 | |
162 X(tensor_destroy)(sz2); | |
163 | |
164 /* reduce to canonical form */ | |
165 canonicalize(x); | |
166 return x; | |
167 } | |
168 | |
169 /* The inverse of X(tensor_append): splits the sz tensor into | |
170 tensor a followed by tensor b, where a's rank is arnk. */ | |
171 void X(tensor_split)(const tensor *sz, tensor **a, int arnk, tensor **b) | |
172 { | |
173 A(FINITE_RNK(sz->rnk) && FINITE_RNK(arnk)); | |
174 | |
175 *a = X(tensor_copy_sub)(sz, 0, arnk); | |
176 *b = X(tensor_copy_sub)(sz, arnk, sz->rnk - arnk); | |
177 } | |
178 | |
179 /* TRUE if the two tensors are equal */ | |
180 int X(tensor_equal)(const tensor *a, const tensor *b) | |
181 { | |
182 if (a->rnk != b->rnk) | |
183 return 0; | |
184 | |
185 if (FINITE_RNK(a->rnk)) { | |
186 int i; | |
187 for (i = 0; i < a->rnk; ++i) | |
188 if (0 | |
189 || a->dims[i].n != b->dims[i].n | |
190 || a->dims[i].is != b->dims[i].is | |
191 || a->dims[i].os != b->dims[i].os | |
192 ) | |
193 return 0; | |
194 } | |
195 | |
196 return 1; | |
197 } | |
198 | |
199 /* TRUE if the sets of input and output locations described by | |
200 (append sz vecsz) are the same */ | |
201 int X(tensor_inplace_locations)(const tensor *sz, const tensor *vecsz) | |
202 { | |
203 tensor *t = X(tensor_append)(sz, vecsz); | |
204 tensor *ti = X(tensor_copy_inplace)(t, INPLACE_IS); | |
205 tensor *to = X(tensor_copy_inplace)(t, INPLACE_OS); | |
206 tensor *tic = X(tensor_compress_contiguous)(ti); | |
207 tensor *toc = X(tensor_compress_contiguous)(to); | |
208 | |
209 int retval = X(tensor_equal)(tic, toc); | |
210 | |
211 X(tensor_destroy)(t); | |
212 X(tensor_destroy4)(ti, to, tic, toc); | |
213 | |
214 return retval; | |
215 } |