Mercurial > hg > sv-dependency-builds
comparison src/fftw-3.3.8/kernel/tensor7.c @ 82:d0c2a83c1364
Add FFTW 3.3.8 source, and a Linux build
| author | Chris Cannam |
|---|---|
| date | Tue, 19 Nov 2019 14:52:55 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 81:7029a4916348 | 82:d0c2a83c1364 |
|---|---|
| 1 /* | |
| 2 * Copyright (c) 2003, 2007-14 Matteo Frigo | |
| 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology | |
| 4 * | |
| 5 * This program is free software; you can redistribute it and/or modify | |
| 6 * it under the terms of the GNU General Public License as published by | |
| 7 * the Free Software Foundation; either version 2 of the License, or | |
| 8 * (at your option) any later version. | |
| 9 * | |
| 10 * This program is distributed in the hope that it will be useful, | |
| 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 13 * GNU General Public License for more details. | |
| 14 * | |
| 15 * You should have received a copy of the GNU General Public License | |
| 16 * along with this program; if not, write to the Free Software | |
| 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
| 18 * | |
| 19 */ | |
| 20 | |
| 21 | |
| 22 #include "kernel/ifftw.h" | |
| 23 | |
| 24 static int signof(INT x) | |
| 25 { | |
| 26 if (x < 0) return -1; | |
| 27 if (x == 0) return 0; | |
| 28 /* if (x > 0) */ return 1; | |
| 29 } | |
| 30 | |
| 31 /* total order among iodim's */ | |
| 32 int X(dimcmp)(const iodim *a, const iodim *b) | |
| 33 { | |
| 34 INT sai = X(iabs)(a->is), sbi = X(iabs)(b->is); | |
| 35 INT sao = X(iabs)(a->os), sbo = X(iabs)(b->os); | |
| 36 INT sam = X(imin)(sai, sao), sbm = X(imin)(sbi, sbo); | |
| 37 | |
| 38 /* in descending order of min{istride, ostride} */ | |
| 39 if (sam != sbm) | |
| 40 return signof(sbm - sam); | |
| 41 | |
| 42 /* in case of a tie, in descending order of istride */ | |
| 43 if (sbi != sai) | |
| 44 return signof(sbi - sai); | |
| 45 | |
| 46 /* in case of a tie, in descending order of ostride */ | |
| 47 if (sbo != sao) | |
| 48 return signof(sbo - sao); | |
| 49 | |
| 50 /* in case of a tie, in ascending order of n */ | |
| 51 return signof(a->n - b->n); | |
| 52 } | |
| 53 | |
| 54 static void canonicalize(tensor *x) | |
| 55 { | |
| 56 if (x->rnk > 1) { | |
| 57 qsort(x->dims, (unsigned)x->rnk, sizeof(iodim), | |
| 58 (int (*)(const void *, const void *))X(dimcmp)); | |
| 59 } | |
| 60 } | |
| 61 | |
| 62 static int compare_by_istride(const iodim *a, const iodim *b) | |
| 63 { | |
| 64 INT sai = X(iabs)(a->is), sbi = X(iabs)(b->is); | |
| 65 | |
| 66 /* in descending order of istride */ | |
| 67 return signof(sbi - sai); | |
| 68 } | |
| 69 | |
| 70 static tensor *really_compress(const tensor *sz) | |
| 71 { | |
| 72 int i, rnk; | |
| 73 tensor *x; | |
| 74 | |
| 75 A(FINITE_RNK(sz->rnk)); | |
| 76 for (i = rnk = 0; i < sz->rnk; ++i) { | |
| 77 A(sz->dims[i].n > 0); | |
| 78 if (sz->dims[i].n != 1) | |
| 79 ++rnk; | |
| 80 } | |
| 81 | |
| 82 x = X(mktensor)(rnk); | |
| 83 for (i = rnk = 0; i < sz->rnk; ++i) { | |
| 84 if (sz->dims[i].n != 1) | |
| 85 x->dims[rnk++] = sz->dims[i]; | |
| 86 } | |
| 87 return x; | |
| 88 } | |
| 89 | |
| 90 /* Like tensor_copy, but eliminate n == 1 dimensions, which | |
| 91 never affect any transform or transform vector. | |
| 92 | |
| 93 Also, we sort the tensor into a canonical order of decreasing | |
| 94 strides (see X(dimcmp) for an exact definition). In general, | |
| 95 processing a loop/array in order of decreasing stride will improve | |
| 96 locality. Both forward and backwards traversal of the tensor are | |
| 97 considered e.g. by vrank-geq1, so sorting in increasing | |
| 98 vs. decreasing order is not really important. */ | |
| 99 tensor *X(tensor_compress)(const tensor *sz) | |
| 100 { | |
| 101 tensor *x = really_compress(sz); | |
| 102 canonicalize(x); | |
| 103 return x; | |
| 104 } | |
| 105 | |
| 106 /* Return whether the strides of a and b are such that they form an | |
| 107 effective contiguous 1d array. Assumes that a.is >= b.is. */ | |
| 108 static int strides_contig(iodim *a, iodim *b) | |
| 109 { | |
| 110 return (a->is == b->is * b->n && a->os == b->os * b->n); | |
| 111 } | |
| 112 | |
| 113 /* Like tensor_compress, but also compress into one dimension any | |
| 114 group of dimensions that form a contiguous block of indices with | |
| 115 some stride. (This can safely be done for transform vector sizes.) */ | |
| 116 tensor *X(tensor_compress_contiguous)(const tensor *sz) | |
| 117 { | |
| 118 int i, rnk; | |
| 119 tensor *sz2, *x; | |
| 120 | |
| 121 if (X(tensor_sz)(sz) == 0) | |
| 122 return X(mktensor)(RNK_MINFTY); | |
| 123 | |
| 124 sz2 = really_compress(sz); | |
| 125 A(FINITE_RNK(sz2->rnk)); | |
| 126 | |
| 127 if (sz2->rnk <= 1) { /* nothing to compress. */ | |
| 128 if (0) { | |
| 129 /* this call is redundant, because "sz->rnk <= 1" implies | |
| 130 that the tensor is already canonical, but I am writing | |
| 131 it explicitly because "logically" we need to canonicalize | |
| 132 the tensor before returning. */ | |
| 133 canonicalize(sz2); | |
| 134 } | |
| 135 return sz2; | |
| 136 } | |
| 137 | |
| 138 /* sort in descending order of |istride|, so that compressible | |
| 139 dimensions appear contigously */ | |
| 140 qsort(sz2->dims, (unsigned)sz2->rnk, sizeof(iodim), | |
| 141 (int (*)(const void *, const void *))compare_by_istride); | |
| 142 | |
| 143 /* compute what the rank will be after compression */ | |
| 144 for (i = rnk = 1; i < sz2->rnk; ++i) | |
| 145 if (!strides_contig(sz2->dims + i - 1, sz2->dims + i)) | |
| 146 ++rnk; | |
| 147 | |
| 148 /* merge adjacent dimensions whenever possible */ | |
| 149 x = X(mktensor)(rnk); | |
| 150 x->dims[0] = sz2->dims[0]; | |
| 151 for (i = rnk = 1; i < sz2->rnk; ++i) { | |
| 152 if (strides_contig(sz2->dims + i - 1, sz2->dims + i)) { | |
| 153 x->dims[rnk - 1].n *= sz2->dims[i].n; | |
| 154 x->dims[rnk - 1].is = sz2->dims[i].is; | |
| 155 x->dims[rnk - 1].os = sz2->dims[i].os; | |
| 156 } else { | |
| 157 A(rnk < x->rnk); | |
| 158 x->dims[rnk++] = sz2->dims[i]; | |
| 159 } | |
| 160 } | |
| 161 | |
| 162 X(tensor_destroy)(sz2); | |
| 163 | |
| 164 /* reduce to canonical form */ | |
| 165 canonicalize(x); | |
| 166 return x; | |
| 167 } | |
| 168 | |
| 169 /* The inverse of X(tensor_append): splits the sz tensor into | |
| 170 tensor a followed by tensor b, where a's rank is arnk. */ | |
| 171 void X(tensor_split)(const tensor *sz, tensor **a, int arnk, tensor **b) | |
| 172 { | |
| 173 A(FINITE_RNK(sz->rnk) && FINITE_RNK(arnk)); | |
| 174 | |
| 175 *a = X(tensor_copy_sub)(sz, 0, arnk); | |
| 176 *b = X(tensor_copy_sub)(sz, arnk, sz->rnk - arnk); | |
| 177 } | |
| 178 | |
| 179 /* TRUE if the two tensors are equal */ | |
| 180 int X(tensor_equal)(const tensor *a, const tensor *b) | |
| 181 { | |
| 182 if (a->rnk != b->rnk) | |
| 183 return 0; | |
| 184 | |
| 185 if (FINITE_RNK(a->rnk)) { | |
| 186 int i; | |
| 187 for (i = 0; i < a->rnk; ++i) | |
| 188 if (0 | |
| 189 || a->dims[i].n != b->dims[i].n | |
| 190 || a->dims[i].is != b->dims[i].is | |
| 191 || a->dims[i].os != b->dims[i].os | |
| 192 ) | |
| 193 return 0; | |
| 194 } | |
| 195 | |
| 196 return 1; | |
| 197 } | |
| 198 | |
| 199 /* TRUE if the sets of input and output locations described by | |
| 200 (append sz vecsz) are the same */ | |
| 201 int X(tensor_inplace_locations)(const tensor *sz, const tensor *vecsz) | |
| 202 { | |
| 203 tensor *t = X(tensor_append)(sz, vecsz); | |
| 204 tensor *ti = X(tensor_copy_inplace)(t, INPLACE_IS); | |
| 205 tensor *to = X(tensor_copy_inplace)(t, INPLACE_OS); | |
| 206 tensor *tic = X(tensor_compress_contiguous)(ti); | |
| 207 tensor *toc = X(tensor_compress_contiguous)(to); | |
| 208 | |
| 209 int retval = X(tensor_equal)(tic, toc); | |
| 210 | |
| 211 X(tensor_destroy)(t); | |
| 212 X(tensor_destroy4)(ti, to, tic, toc); | |
| 213 | |
| 214 return retval; | |
| 215 } |
