annotate src/fftw-3.3.8/kernel/tensor7.c @ 168:ceec0dd9ec9c

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam <cannam@all-day-breakfast.com>
date Fri, 07 Feb 2020 11:51:13 +0000
parents bd3cc4d1df30
children
rev   line source
cannam@167 1 /*
cannam@167 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
cannam@167 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
cannam@167 4 *
cannam@167 5 * This program is free software; you can redistribute it and/or modify
cannam@167 6 * it under the terms of the GNU General Public License as published by
cannam@167 7 * the Free Software Foundation; either version 2 of the License, or
cannam@167 8 * (at your option) any later version.
cannam@167 9 *
cannam@167 10 * This program is distributed in the hope that it will be useful,
cannam@167 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
cannam@167 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
cannam@167 13 * GNU General Public License for more details.
cannam@167 14 *
cannam@167 15 * You should have received a copy of the GNU General Public License
cannam@167 16 * along with this program; if not, write to the Free Software
cannam@167 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
cannam@167 18 *
cannam@167 19 */
cannam@167 20
cannam@167 21
cannam@167 22 #include "kernel/ifftw.h"
cannam@167 23
cannam@167 24 static int signof(INT x)
cannam@167 25 {
cannam@167 26 if (x < 0) return -1;
cannam@167 27 if (x == 0) return 0;
cannam@167 28 /* if (x > 0) */ return 1;
cannam@167 29 }
cannam@167 30
cannam@167 31 /* total order among iodim's */
cannam@167 32 int X(dimcmp)(const iodim *a, const iodim *b)
cannam@167 33 {
cannam@167 34 INT sai = X(iabs)(a->is), sbi = X(iabs)(b->is);
cannam@167 35 INT sao = X(iabs)(a->os), sbo = X(iabs)(b->os);
cannam@167 36 INT sam = X(imin)(sai, sao), sbm = X(imin)(sbi, sbo);
cannam@167 37
cannam@167 38 /* in descending order of min{istride, ostride} */
cannam@167 39 if (sam != sbm)
cannam@167 40 return signof(sbm - sam);
cannam@167 41
cannam@167 42 /* in case of a tie, in descending order of istride */
cannam@167 43 if (sbi != sai)
cannam@167 44 return signof(sbi - sai);
cannam@167 45
cannam@167 46 /* in case of a tie, in descending order of ostride */
cannam@167 47 if (sbo != sao)
cannam@167 48 return signof(sbo - sao);
cannam@167 49
cannam@167 50 /* in case of a tie, in ascending order of n */
cannam@167 51 return signof(a->n - b->n);
cannam@167 52 }
cannam@167 53
cannam@167 54 static void canonicalize(tensor *x)
cannam@167 55 {
cannam@167 56 if (x->rnk > 1) {
cannam@167 57 qsort(x->dims, (unsigned)x->rnk, sizeof(iodim),
cannam@167 58 (int (*)(const void *, const void *))X(dimcmp));
cannam@167 59 }
cannam@167 60 }
cannam@167 61
cannam@167 62 static int compare_by_istride(const iodim *a, const iodim *b)
cannam@167 63 {
cannam@167 64 INT sai = X(iabs)(a->is), sbi = X(iabs)(b->is);
cannam@167 65
cannam@167 66 /* in descending order of istride */
cannam@167 67 return signof(sbi - sai);
cannam@167 68 }
cannam@167 69
cannam@167 70 static tensor *really_compress(const tensor *sz)
cannam@167 71 {
cannam@167 72 int i, rnk;
cannam@167 73 tensor *x;
cannam@167 74
cannam@167 75 A(FINITE_RNK(sz->rnk));
cannam@167 76 for (i = rnk = 0; i < sz->rnk; ++i) {
cannam@167 77 A(sz->dims[i].n > 0);
cannam@167 78 if (sz->dims[i].n != 1)
cannam@167 79 ++rnk;
cannam@167 80 }
cannam@167 81
cannam@167 82 x = X(mktensor)(rnk);
cannam@167 83 for (i = rnk = 0; i < sz->rnk; ++i) {
cannam@167 84 if (sz->dims[i].n != 1)
cannam@167 85 x->dims[rnk++] = sz->dims[i];
cannam@167 86 }
cannam@167 87 return x;
cannam@167 88 }
cannam@167 89
cannam@167 90 /* Like tensor_copy, but eliminate n == 1 dimensions, which
cannam@167 91 never affect any transform or transform vector.
cannam@167 92
cannam@167 93 Also, we sort the tensor into a canonical order of decreasing
cannam@167 94 strides (see X(dimcmp) for an exact definition). In general,
cannam@167 95 processing a loop/array in order of decreasing stride will improve
cannam@167 96 locality. Both forward and backwards traversal of the tensor are
cannam@167 97 considered e.g. by vrank-geq1, so sorting in increasing
cannam@167 98 vs. decreasing order is not really important. */
cannam@167 99 tensor *X(tensor_compress)(const tensor *sz)
cannam@167 100 {
cannam@167 101 tensor *x = really_compress(sz);
cannam@167 102 canonicalize(x);
cannam@167 103 return x;
cannam@167 104 }
cannam@167 105
cannam@167 106 /* Return whether the strides of a and b are such that they form an
cannam@167 107 effective contiguous 1d array. Assumes that a.is >= b.is. */
cannam@167 108 static int strides_contig(iodim *a, iodim *b)
cannam@167 109 {
cannam@167 110 return (a->is == b->is * b->n && a->os == b->os * b->n);
cannam@167 111 }
cannam@167 112
cannam@167 113 /* Like tensor_compress, but also compress into one dimension any
cannam@167 114 group of dimensions that form a contiguous block of indices with
cannam@167 115 some stride. (This can safely be done for transform vector sizes.) */
cannam@167 116 tensor *X(tensor_compress_contiguous)(const tensor *sz)
cannam@167 117 {
cannam@167 118 int i, rnk;
cannam@167 119 tensor *sz2, *x;
cannam@167 120
cannam@167 121 if (X(tensor_sz)(sz) == 0)
cannam@167 122 return X(mktensor)(RNK_MINFTY);
cannam@167 123
cannam@167 124 sz2 = really_compress(sz);
cannam@167 125 A(FINITE_RNK(sz2->rnk));
cannam@167 126
cannam@167 127 if (sz2->rnk <= 1) { /* nothing to compress. */
cannam@167 128 if (0) {
cannam@167 129 /* this call is redundant, because "sz->rnk <= 1" implies
cannam@167 130 that the tensor is already canonical, but I am writing
cannam@167 131 it explicitly because "logically" we need to canonicalize
cannam@167 132 the tensor before returning. */
cannam@167 133 canonicalize(sz2);
cannam@167 134 }
cannam@167 135 return sz2;
cannam@167 136 }
cannam@167 137
cannam@167 138 /* sort in descending order of |istride|, so that compressible
cannam@167 139 dimensions appear contigously */
cannam@167 140 qsort(sz2->dims, (unsigned)sz2->rnk, sizeof(iodim),
cannam@167 141 (int (*)(const void *, const void *))compare_by_istride);
cannam@167 142
cannam@167 143 /* compute what the rank will be after compression */
cannam@167 144 for (i = rnk = 1; i < sz2->rnk; ++i)
cannam@167 145 if (!strides_contig(sz2->dims + i - 1, sz2->dims + i))
cannam@167 146 ++rnk;
cannam@167 147
cannam@167 148 /* merge adjacent dimensions whenever possible */
cannam@167 149 x = X(mktensor)(rnk);
cannam@167 150 x->dims[0] = sz2->dims[0];
cannam@167 151 for (i = rnk = 1; i < sz2->rnk; ++i) {
cannam@167 152 if (strides_contig(sz2->dims + i - 1, sz2->dims + i)) {
cannam@167 153 x->dims[rnk - 1].n *= sz2->dims[i].n;
cannam@167 154 x->dims[rnk - 1].is = sz2->dims[i].is;
cannam@167 155 x->dims[rnk - 1].os = sz2->dims[i].os;
cannam@167 156 } else {
cannam@167 157 A(rnk < x->rnk);
cannam@167 158 x->dims[rnk++] = sz2->dims[i];
cannam@167 159 }
cannam@167 160 }
cannam@167 161
cannam@167 162 X(tensor_destroy)(sz2);
cannam@167 163
cannam@167 164 /* reduce to canonical form */
cannam@167 165 canonicalize(x);
cannam@167 166 return x;
cannam@167 167 }
cannam@167 168
cannam@167 169 /* The inverse of X(tensor_append): splits the sz tensor into
cannam@167 170 tensor a followed by tensor b, where a's rank is arnk. */
cannam@167 171 void X(tensor_split)(const tensor *sz, tensor **a, int arnk, tensor **b)
cannam@167 172 {
cannam@167 173 A(FINITE_RNK(sz->rnk) && FINITE_RNK(arnk));
cannam@167 174
cannam@167 175 *a = X(tensor_copy_sub)(sz, 0, arnk);
cannam@167 176 *b = X(tensor_copy_sub)(sz, arnk, sz->rnk - arnk);
cannam@167 177 }
cannam@167 178
cannam@167 179 /* TRUE if the two tensors are equal */
cannam@167 180 int X(tensor_equal)(const tensor *a, const tensor *b)
cannam@167 181 {
cannam@167 182 if (a->rnk != b->rnk)
cannam@167 183 return 0;
cannam@167 184
cannam@167 185 if (FINITE_RNK(a->rnk)) {
cannam@167 186 int i;
cannam@167 187 for (i = 0; i < a->rnk; ++i)
cannam@167 188 if (0
cannam@167 189 || a->dims[i].n != b->dims[i].n
cannam@167 190 || a->dims[i].is != b->dims[i].is
cannam@167 191 || a->dims[i].os != b->dims[i].os
cannam@167 192 )
cannam@167 193 return 0;
cannam@167 194 }
cannam@167 195
cannam@167 196 return 1;
cannam@167 197 }
cannam@167 198
cannam@167 199 /* TRUE if the sets of input and output locations described by
cannam@167 200 (append sz vecsz) are the same */
cannam@167 201 int X(tensor_inplace_locations)(const tensor *sz, const tensor *vecsz)
cannam@167 202 {
cannam@167 203 tensor *t = X(tensor_append)(sz, vecsz);
cannam@167 204 tensor *ti = X(tensor_copy_inplace)(t, INPLACE_IS);
cannam@167 205 tensor *to = X(tensor_copy_inplace)(t, INPLACE_OS);
cannam@167 206 tensor *tic = X(tensor_compress_contiguous)(ti);
cannam@167 207 tensor *toc = X(tensor_compress_contiguous)(to);
cannam@167 208
cannam@167 209 int retval = X(tensor_equal)(tic, toc);
cannam@167 210
cannam@167 211 X(tensor_destroy)(t);
cannam@167 212 X(tensor_destroy4)(ti, to, tic, toc);
cannam@167 213
cannam@167 214 return retval;
cannam@167 215 }