| cannam@127 | 1 /* | 
| cannam@127 | 2  * Copyright (c) 2003, 2007-14 Matteo Frigo | 
| cannam@127 | 3  * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology | 
| cannam@127 | 4  * | 
| cannam@127 | 5  * This program is free software; you can redistribute it and/or modify | 
| cannam@127 | 6  * it under the terms of the GNU General Public License as published by | 
| cannam@127 | 7  * the Free Software Foundation; either version 2 of the License, or | 
| cannam@127 | 8  * (at your option) any later version. | 
| cannam@127 | 9  * | 
| cannam@127 | 10  * This program is distributed in the hope that it will be useful, | 
| cannam@127 | 11  * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
| cannam@127 | 12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
| cannam@127 | 13  * GNU General Public License for more details. | 
| cannam@127 | 14  * | 
| cannam@127 | 15  * You should have received a copy of the GNU General Public License | 
| cannam@127 | 16  * along with this program; if not, write to the Free Software | 
| cannam@127 | 17  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA | 
| cannam@127 | 18  * | 
| cannam@127 | 19  */ | 
| cannam@127 | 20 | 
| cannam@127 | 21 | 
| cannam@127 | 22 #include "ifftw.h" | 
| cannam@127 | 23 | 
| cannam@127 | 24 static int signof(INT x) | 
| cannam@127 | 25 { | 
| cannam@127 | 26      if (x < 0) return -1; | 
| cannam@127 | 27      if (x == 0) return 0; | 
| cannam@127 | 28      /* if (x > 0) */ return 1; | 
| cannam@127 | 29 } | 
| cannam@127 | 30 | 
| cannam@127 | 31 /* total order among iodim's */ | 
| cannam@127 | 32 int X(dimcmp)(const iodim *a, const iodim *b) | 
| cannam@127 | 33 { | 
| cannam@127 | 34      INT sai = X(iabs)(a->is), sbi = X(iabs)(b->is); | 
| cannam@127 | 35      INT sao = X(iabs)(a->os), sbo = X(iabs)(b->os); | 
| cannam@127 | 36      INT sam = X(imin)(sai, sao), sbm = X(imin)(sbi, sbo); | 
| cannam@127 | 37 | 
| cannam@127 | 38      /* in descending order of min{istride, ostride} */ | 
| cannam@127 | 39      if (sam != sbm) | 
| cannam@127 | 40 	  return signof(sbm - sam); | 
| cannam@127 | 41 | 
| cannam@127 | 42      /* in case of a tie, in descending order of istride */ | 
| cannam@127 | 43      if (sbi != sai) | 
| cannam@127 | 44           return signof(sbi - sai); | 
| cannam@127 | 45 | 
| cannam@127 | 46      /* in case of a tie, in descending order of ostride */ | 
| cannam@127 | 47      if (sbo != sao) | 
| cannam@127 | 48           return signof(sbo - sao); | 
| cannam@127 | 49 | 
| cannam@127 | 50      /* in case of a tie, in ascending order of n */ | 
| cannam@127 | 51      return signof(a->n - b->n); | 
| cannam@127 | 52 } | 
| cannam@127 | 53 | 
| cannam@127 | 54 static void canonicalize(tensor *x) | 
| cannam@127 | 55 { | 
| cannam@127 | 56      if (x->rnk > 1) { | 
| cannam@127 | 57 	  qsort(x->dims, (unsigned)x->rnk, sizeof(iodim), | 
| cannam@127 | 58 		(int (*)(const void *, const void *))X(dimcmp)); | 
| cannam@127 | 59      } | 
| cannam@127 | 60 } | 
| cannam@127 | 61 | 
| cannam@127 | 62 static int compare_by_istride(const iodim *a, const iodim *b) | 
| cannam@127 | 63 { | 
| cannam@127 | 64      INT sai = X(iabs)(a->is), sbi = X(iabs)(b->is); | 
| cannam@127 | 65 | 
| cannam@127 | 66      /* in descending order of istride */ | 
| cannam@127 | 67      return signof(sbi - sai); | 
| cannam@127 | 68 } | 
| cannam@127 | 69 | 
| cannam@127 | 70 static tensor *really_compress(const tensor *sz) | 
| cannam@127 | 71 { | 
| cannam@127 | 72      int i, rnk; | 
| cannam@127 | 73      tensor *x; | 
| cannam@127 | 74 | 
| cannam@127 | 75      A(FINITE_RNK(sz->rnk)); | 
| cannam@127 | 76      for (i = rnk = 0; i < sz->rnk; ++i) { | 
| cannam@127 | 77           A(sz->dims[i].n > 0); | 
| cannam@127 | 78           if (sz->dims[i].n != 1) | 
| cannam@127 | 79                ++rnk; | 
| cannam@127 | 80      } | 
| cannam@127 | 81 | 
| cannam@127 | 82      x = X(mktensor)(rnk); | 
| cannam@127 | 83      for (i = rnk = 0; i < sz->rnk; ++i) { | 
| cannam@127 | 84           if (sz->dims[i].n != 1) | 
| cannam@127 | 85                x->dims[rnk++] = sz->dims[i]; | 
| cannam@127 | 86      } | 
| cannam@127 | 87      return x; | 
| cannam@127 | 88 } | 
| cannam@127 | 89 | 
| cannam@127 | 90 /* Like tensor_copy, but eliminate n == 1 dimensions, which | 
| cannam@127 | 91    never affect any transform or transform vector. | 
| cannam@127 | 92 | 
| cannam@127 | 93    Also, we sort the tensor into a canonical order of decreasing | 
| cannam@127 | 94    strides (see X(dimcmp) for an exact definition).  In general, | 
| cannam@127 | 95    processing a loop/array in order of decreasing stride will improve | 
| cannam@127 | 96    locality.  Both forward and backwards traversal of the tensor are | 
| cannam@127 | 97    considered e.g. by vrank-geq1, so sorting in increasing | 
| cannam@127 | 98    vs. decreasing order is not really important. */ | 
| cannam@127 | 99 tensor *X(tensor_compress)(const tensor *sz) | 
| cannam@127 | 100 { | 
| cannam@127 | 101      tensor *x = really_compress(sz); | 
| cannam@127 | 102      canonicalize(x); | 
| cannam@127 | 103      return x; | 
| cannam@127 | 104 } | 
| cannam@127 | 105 | 
| cannam@127 | 106 /* Return whether the strides of a and b are such that they form an | 
| cannam@127 | 107    effective contiguous 1d array.  Assumes that a.is >= b.is. */ | 
| cannam@127 | 108 static int strides_contig(iodim *a, iodim *b) | 
| cannam@127 | 109 { | 
| cannam@127 | 110      return (a->is == b->is * b->n && a->os == b->os * b->n); | 
| cannam@127 | 111 } | 
| cannam@127 | 112 | 
| cannam@127 | 113 /* Like tensor_compress, but also compress into one dimension any | 
| cannam@127 | 114    group of dimensions that form a contiguous block of indices with | 
| cannam@127 | 115    some stride.  (This can safely be done for transform vector sizes.) */ | 
| cannam@127 | 116 tensor *X(tensor_compress_contiguous)(const tensor *sz) | 
| cannam@127 | 117 { | 
| cannam@127 | 118      int i, rnk; | 
| cannam@127 | 119      tensor *sz2, *x; | 
| cannam@127 | 120 | 
| cannam@127 | 121      if (X(tensor_sz)(sz) == 0) | 
| cannam@127 | 122 	  return X(mktensor)(RNK_MINFTY); | 
| cannam@127 | 123 | 
| cannam@127 | 124      sz2 = really_compress(sz); | 
| cannam@127 | 125      A(FINITE_RNK(sz2->rnk)); | 
| cannam@127 | 126 | 
| cannam@127 | 127      if (sz2->rnk <= 1) { /* nothing to compress. */ | 
| cannam@127 | 128 	  if (0) { | 
| cannam@127 | 129 	       /* this call is redundant, because "sz->rnk <= 1" implies | 
| cannam@127 | 130 		  that the tensor is already canonical, but I am writing | 
| cannam@127 | 131 		  it explicitly because "logically" we need to canonicalize | 
| cannam@127 | 132 		  the tensor before returning. */ | 
| cannam@127 | 133 	       canonicalize(sz2); | 
| cannam@127 | 134 	  } | 
| cannam@127 | 135           return sz2; | 
| cannam@127 | 136      } | 
| cannam@127 | 137 | 
| cannam@127 | 138      /* sort in descending order of |istride|, so that compressible | 
| cannam@127 | 139 	dimensions appear contigously */ | 
| cannam@127 | 140      qsort(sz2->dims, (unsigned)sz2->rnk, sizeof(iodim), | 
| cannam@127 | 141 		(int (*)(const void *, const void *))compare_by_istride); | 
| cannam@127 | 142 | 
| cannam@127 | 143      /* compute what the rank will be after compression */ | 
| cannam@127 | 144      for (i = rnk = 1; i < sz2->rnk; ++i) | 
| cannam@127 | 145           if (!strides_contig(sz2->dims + i - 1, sz2->dims + i)) | 
| cannam@127 | 146                ++rnk; | 
| cannam@127 | 147 | 
| cannam@127 | 148      /* merge adjacent dimensions whenever possible */ | 
| cannam@127 | 149      x = X(mktensor)(rnk); | 
| cannam@127 | 150      x->dims[0] = sz2->dims[0]; | 
| cannam@127 | 151      for (i = rnk = 1; i < sz2->rnk; ++i) { | 
| cannam@127 | 152           if (strides_contig(sz2->dims + i - 1, sz2->dims + i)) { | 
| cannam@127 | 153                x->dims[rnk - 1].n *= sz2->dims[i].n; | 
| cannam@127 | 154                x->dims[rnk - 1].is = sz2->dims[i].is; | 
| cannam@127 | 155                x->dims[rnk - 1].os = sz2->dims[i].os; | 
| cannam@127 | 156           } else { | 
| cannam@127 | 157                A(rnk < x->rnk); | 
| cannam@127 | 158                x->dims[rnk++] = sz2->dims[i]; | 
| cannam@127 | 159           } | 
| cannam@127 | 160      } | 
| cannam@127 | 161 | 
| cannam@127 | 162      X(tensor_destroy)(sz2); | 
| cannam@127 | 163 | 
| cannam@127 | 164      /* reduce to canonical form */ | 
| cannam@127 | 165      canonicalize(x); | 
| cannam@127 | 166      return x; | 
| cannam@127 | 167 } | 
| cannam@127 | 168 | 
| cannam@127 | 169 /* The inverse of X(tensor_append): splits the sz tensor into | 
| cannam@127 | 170    tensor a followed by tensor b, where a's rank is arnk. */ | 
| cannam@127 | 171 void X(tensor_split)(const tensor *sz, tensor **a, int arnk, tensor **b) | 
| cannam@127 | 172 { | 
| cannam@127 | 173      A(FINITE_RNK(sz->rnk) && FINITE_RNK(arnk)); | 
| cannam@127 | 174 | 
| cannam@127 | 175      *a = X(tensor_copy_sub)(sz, 0, arnk); | 
| cannam@127 | 176      *b = X(tensor_copy_sub)(sz, arnk, sz->rnk - arnk); | 
| cannam@127 | 177 } | 
| cannam@127 | 178 | 
| cannam@127 | 179 /* TRUE if the two tensors are equal */ | 
| cannam@127 | 180 int X(tensor_equal)(const tensor *a, const tensor *b) | 
| cannam@127 | 181 { | 
| cannam@127 | 182      if (a->rnk != b->rnk) | 
| cannam@127 | 183 	  return 0; | 
| cannam@127 | 184 | 
| cannam@127 | 185      if (FINITE_RNK(a->rnk)) { | 
| cannam@127 | 186 	  int i; | 
| cannam@127 | 187 	  for (i = 0; i < a->rnk; ++i) | 
| cannam@127 | 188 	       if (0 | 
| cannam@127 | 189 		   || a->dims[i].n != b->dims[i].n | 
| cannam@127 | 190 		   || a->dims[i].is != b->dims[i].is | 
| cannam@127 | 191 		   || a->dims[i].os != b->dims[i].os | 
| cannam@127 | 192 		    ) | 
| cannam@127 | 193 		    return 0; | 
| cannam@127 | 194      } | 
| cannam@127 | 195 | 
| cannam@127 | 196      return 1; | 
| cannam@127 | 197 } | 
| cannam@127 | 198 | 
| cannam@127 | 199 /* TRUE if the sets of input and output locations described by | 
| cannam@127 | 200    (append sz vecsz) are the same */ | 
| cannam@127 | 201 int X(tensor_inplace_locations)(const tensor *sz, const tensor *vecsz) | 
| cannam@127 | 202 { | 
| cannam@127 | 203      tensor *t = X(tensor_append)(sz, vecsz); | 
| cannam@127 | 204      tensor *ti = X(tensor_copy_inplace)(t, INPLACE_IS); | 
| cannam@127 | 205      tensor *to = X(tensor_copy_inplace)(t, INPLACE_OS); | 
| cannam@127 | 206      tensor *tic = X(tensor_compress_contiguous)(ti); | 
| cannam@127 | 207      tensor *toc = X(tensor_compress_contiguous)(to); | 
| cannam@127 | 208 | 
| cannam@127 | 209      int retval = X(tensor_equal)(tic, toc); | 
| cannam@127 | 210 | 
| cannam@127 | 211      X(tensor_destroy)(t); | 
| cannam@127 | 212      X(tensor_destroy4)(ti, to, tic, toc); | 
| cannam@127 | 213 | 
| cannam@127 | 214      return retval; | 
| cannam@127 | 215 } |