annotate src/fftw-3.3.5/kernel/tensor7.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21
Chris@42 22 #include "ifftw.h"
Chris@42 23
Chris@42 24 static int signof(INT x)
Chris@42 25 {
Chris@42 26 if (x < 0) return -1;
Chris@42 27 if (x == 0) return 0;
Chris@42 28 /* if (x > 0) */ return 1;
Chris@42 29 }
Chris@42 30
Chris@42 31 /* total order among iodim's */
Chris@42 32 int X(dimcmp)(const iodim *a, const iodim *b)
Chris@42 33 {
Chris@42 34 INT sai = X(iabs)(a->is), sbi = X(iabs)(b->is);
Chris@42 35 INT sao = X(iabs)(a->os), sbo = X(iabs)(b->os);
Chris@42 36 INT sam = X(imin)(sai, sao), sbm = X(imin)(sbi, sbo);
Chris@42 37
Chris@42 38 /* in descending order of min{istride, ostride} */
Chris@42 39 if (sam != sbm)
Chris@42 40 return signof(sbm - sam);
Chris@42 41
Chris@42 42 /* in case of a tie, in descending order of istride */
Chris@42 43 if (sbi != sai)
Chris@42 44 return signof(sbi - sai);
Chris@42 45
Chris@42 46 /* in case of a tie, in descending order of ostride */
Chris@42 47 if (sbo != sao)
Chris@42 48 return signof(sbo - sao);
Chris@42 49
Chris@42 50 /* in case of a tie, in ascending order of n */
Chris@42 51 return signof(a->n - b->n);
Chris@42 52 }
Chris@42 53
Chris@42 54 static void canonicalize(tensor *x)
Chris@42 55 {
Chris@42 56 if (x->rnk > 1) {
Chris@42 57 qsort(x->dims, (unsigned)x->rnk, sizeof(iodim),
Chris@42 58 (int (*)(const void *, const void *))X(dimcmp));
Chris@42 59 }
Chris@42 60 }
Chris@42 61
Chris@42 62 static int compare_by_istride(const iodim *a, const iodim *b)
Chris@42 63 {
Chris@42 64 INT sai = X(iabs)(a->is), sbi = X(iabs)(b->is);
Chris@42 65
Chris@42 66 /* in descending order of istride */
Chris@42 67 return signof(sbi - sai);
Chris@42 68 }
Chris@42 69
Chris@42 70 static tensor *really_compress(const tensor *sz)
Chris@42 71 {
Chris@42 72 int i, rnk;
Chris@42 73 tensor *x;
Chris@42 74
Chris@42 75 A(FINITE_RNK(sz->rnk));
Chris@42 76 for (i = rnk = 0; i < sz->rnk; ++i) {
Chris@42 77 A(sz->dims[i].n > 0);
Chris@42 78 if (sz->dims[i].n != 1)
Chris@42 79 ++rnk;
Chris@42 80 }
Chris@42 81
Chris@42 82 x = X(mktensor)(rnk);
Chris@42 83 for (i = rnk = 0; i < sz->rnk; ++i) {
Chris@42 84 if (sz->dims[i].n != 1)
Chris@42 85 x->dims[rnk++] = sz->dims[i];
Chris@42 86 }
Chris@42 87 return x;
Chris@42 88 }
Chris@42 89
Chris@42 90 /* Like tensor_copy, but eliminate n == 1 dimensions, which
Chris@42 91 never affect any transform or transform vector.
Chris@42 92
Chris@42 93 Also, we sort the tensor into a canonical order of decreasing
Chris@42 94 strides (see X(dimcmp) for an exact definition). In general,
Chris@42 95 processing a loop/array in order of decreasing stride will improve
Chris@42 96 locality. Both forward and backwards traversal of the tensor are
Chris@42 97 considered e.g. by vrank-geq1, so sorting in increasing
Chris@42 98 vs. decreasing order is not really important. */
Chris@42 99 tensor *X(tensor_compress)(const tensor *sz)
Chris@42 100 {
Chris@42 101 tensor *x = really_compress(sz);
Chris@42 102 canonicalize(x);
Chris@42 103 return x;
Chris@42 104 }
Chris@42 105
Chris@42 106 /* Return whether the strides of a and b are such that they form an
Chris@42 107 effective contiguous 1d array. Assumes that a.is >= b.is. */
Chris@42 108 static int strides_contig(iodim *a, iodim *b)
Chris@42 109 {
Chris@42 110 return (a->is == b->is * b->n && a->os == b->os * b->n);
Chris@42 111 }
Chris@42 112
Chris@42 113 /* Like tensor_compress, but also compress into one dimension any
Chris@42 114 group of dimensions that form a contiguous block of indices with
Chris@42 115 some stride. (This can safely be done for transform vector sizes.) */
Chris@42 116 tensor *X(tensor_compress_contiguous)(const tensor *sz)
Chris@42 117 {
Chris@42 118 int i, rnk;
Chris@42 119 tensor *sz2, *x;
Chris@42 120
Chris@42 121 if (X(tensor_sz)(sz) == 0)
Chris@42 122 return X(mktensor)(RNK_MINFTY);
Chris@42 123
Chris@42 124 sz2 = really_compress(sz);
Chris@42 125 A(FINITE_RNK(sz2->rnk));
Chris@42 126
Chris@42 127 if (sz2->rnk <= 1) { /* nothing to compress. */
Chris@42 128 if (0) {
Chris@42 129 /* this call is redundant, because "sz->rnk <= 1" implies
Chris@42 130 that the tensor is already canonical, but I am writing
Chris@42 131 it explicitly because "logically" we need to canonicalize
Chris@42 132 the tensor before returning. */
Chris@42 133 canonicalize(sz2);
Chris@42 134 }
Chris@42 135 return sz2;
Chris@42 136 }
Chris@42 137
Chris@42 138 /* sort in descending order of |istride|, so that compressible
Chris@42 139 dimensions appear contigously */
Chris@42 140 qsort(sz2->dims, (unsigned)sz2->rnk, sizeof(iodim),
Chris@42 141 (int (*)(const void *, const void *))compare_by_istride);
Chris@42 142
Chris@42 143 /* compute what the rank will be after compression */
Chris@42 144 for (i = rnk = 1; i < sz2->rnk; ++i)
Chris@42 145 if (!strides_contig(sz2->dims + i - 1, sz2->dims + i))
Chris@42 146 ++rnk;
Chris@42 147
Chris@42 148 /* merge adjacent dimensions whenever possible */
Chris@42 149 x = X(mktensor)(rnk);
Chris@42 150 x->dims[0] = sz2->dims[0];
Chris@42 151 for (i = rnk = 1; i < sz2->rnk; ++i) {
Chris@42 152 if (strides_contig(sz2->dims + i - 1, sz2->dims + i)) {
Chris@42 153 x->dims[rnk - 1].n *= sz2->dims[i].n;
Chris@42 154 x->dims[rnk - 1].is = sz2->dims[i].is;
Chris@42 155 x->dims[rnk - 1].os = sz2->dims[i].os;
Chris@42 156 } else {
Chris@42 157 A(rnk < x->rnk);
Chris@42 158 x->dims[rnk++] = sz2->dims[i];
Chris@42 159 }
Chris@42 160 }
Chris@42 161
Chris@42 162 X(tensor_destroy)(sz2);
Chris@42 163
Chris@42 164 /* reduce to canonical form */
Chris@42 165 canonicalize(x);
Chris@42 166 return x;
Chris@42 167 }
Chris@42 168
Chris@42 169 /* The inverse of X(tensor_append): splits the sz tensor into
Chris@42 170 tensor a followed by tensor b, where a's rank is arnk. */
Chris@42 171 void X(tensor_split)(const tensor *sz, tensor **a, int arnk, tensor **b)
Chris@42 172 {
Chris@42 173 A(FINITE_RNK(sz->rnk) && FINITE_RNK(arnk));
Chris@42 174
Chris@42 175 *a = X(tensor_copy_sub)(sz, 0, arnk);
Chris@42 176 *b = X(tensor_copy_sub)(sz, arnk, sz->rnk - arnk);
Chris@42 177 }
Chris@42 178
Chris@42 179 /* TRUE if the two tensors are equal */
Chris@42 180 int X(tensor_equal)(const tensor *a, const tensor *b)
Chris@42 181 {
Chris@42 182 if (a->rnk != b->rnk)
Chris@42 183 return 0;
Chris@42 184
Chris@42 185 if (FINITE_RNK(a->rnk)) {
Chris@42 186 int i;
Chris@42 187 for (i = 0; i < a->rnk; ++i)
Chris@42 188 if (0
Chris@42 189 || a->dims[i].n != b->dims[i].n
Chris@42 190 || a->dims[i].is != b->dims[i].is
Chris@42 191 || a->dims[i].os != b->dims[i].os
Chris@42 192 )
Chris@42 193 return 0;
Chris@42 194 }
Chris@42 195
Chris@42 196 return 1;
Chris@42 197 }
Chris@42 198
Chris@42 199 /* TRUE if the sets of input and output locations described by
Chris@42 200 (append sz vecsz) are the same */
Chris@42 201 int X(tensor_inplace_locations)(const tensor *sz, const tensor *vecsz)
Chris@42 202 {
Chris@42 203 tensor *t = X(tensor_append)(sz, vecsz);
Chris@42 204 tensor *ti = X(tensor_copy_inplace)(t, INPLACE_IS);
Chris@42 205 tensor *to = X(tensor_copy_inplace)(t, INPLACE_OS);
Chris@42 206 tensor *tic = X(tensor_compress_contiguous)(ti);
Chris@42 207 tensor *toc = X(tensor_compress_contiguous)(to);
Chris@42 208
Chris@42 209 int retval = X(tensor_equal)(tic, toc);
Chris@42 210
Chris@42 211 X(tensor_destroy)(t);
Chris@42 212 X(tensor_destroy4)(ti, to, tic, toc);
Chris@42 213
Chris@42 214 return retval;
Chris@42 215 }