| Chris@42 | 1 /* | 
| Chris@42 | 2  * Copyright (c) 2003, 2007-14 Matteo Frigo | 
| Chris@42 | 3  * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology | 
| Chris@42 | 4  * | 
| Chris@42 | 5  * This program is free software; you can redistribute it and/or modify | 
| Chris@42 | 6  * it under the terms of the GNU General Public License as published by | 
| Chris@42 | 7  * the Free Software Foundation; either version 2 of the License, or | 
| Chris@42 | 8  * (at your option) any later version. | 
| Chris@42 | 9  * | 
| Chris@42 | 10  * This program is distributed in the hope that it will be useful, | 
| Chris@42 | 11  * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
| Chris@42 | 12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
| Chris@42 | 13  * GNU General Public License for more details. | 
| Chris@42 | 14  * | 
| Chris@42 | 15  * You should have received a copy of the GNU General Public License | 
| Chris@42 | 16  * along with this program; if not, write to the Free Software | 
| Chris@42 | 17  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA | 
| Chris@42 | 18  * | 
| Chris@42 | 19  */ | 
| Chris@42 | 20 | 
| Chris@42 | 21 #include "api.h" | 
| Chris@42 | 22 #include <math.h> | 
| Chris@42 | 23 | 
| Chris@42 | 24 /* a flag operation: x is either a flag, in which case xm == 0, or | 
| Chris@42 | 25    a mask, in which case xm == x; using this we can compactly code | 
| Chris@42 | 26    the various bit operations via (flags & x) ^ xm or (flags | x) ^ xm. */ | 
| Chris@42 | 27 typedef struct { | 
| Chris@42 | 28      unsigned x, xm; | 
| Chris@42 | 29 } flagmask; | 
| Chris@42 | 30 | 
| Chris@42 | 31 typedef struct { | 
| Chris@42 | 32      flagmask flag; | 
| Chris@42 | 33      flagmask op; | 
| Chris@42 | 34 } flagop; | 
| Chris@42 | 35 | 
| Chris@42 | 36 #define FLAGP(f, msk)(((f) & (msk).x) ^ (msk).xm) | 
| Chris@42 | 37 #define OP(f, msk)(((f) | (msk).x) ^ (msk).xm) | 
| Chris@42 | 38 | 
| Chris@42 | 39 #define YES(x) {x, 0} | 
| Chris@42 | 40 #define NO(x) {x, x} | 
| Chris@42 | 41 #define IMPLIES(predicate, consequence) { predicate, consequence } | 
| Chris@42 | 42 #define EQV(a, b) IMPLIES(YES(a), YES(b)), IMPLIES(NO(a), NO(b)) | 
| Chris@42 | 43 #define NEQV(a, b) IMPLIES(YES(a), NO(b)), IMPLIES(NO(a), YES(b)) | 
| Chris@42 | 44 | 
| Chris@42 | 45 static void map_flags(unsigned *iflags, unsigned *oflags, | 
| Chris@42 | 46 		      const flagop flagmap[], size_t nmap) | 
| Chris@42 | 47 { | 
| Chris@42 | 48      size_t i; | 
| Chris@42 | 49      for (i = 0; i < nmap; ++i) | 
| Chris@42 | 50           if (FLAGP(*iflags, flagmap[i].flag)) | 
| Chris@42 | 51                *oflags = OP(*oflags, flagmap[i].op); | 
| Chris@42 | 52 } | 
| Chris@42 | 53 | 
| Chris@42 | 54 /* encoding of the planner timelimit into a BITS_FOR_TIMELIMIT-bits | 
| Chris@42 | 55    nonnegative integer, such that we can still view the integer as | 
| Chris@42 | 56    ``impatience'': higher means *lower* time limit, and 0 is the | 
| Chris@42 | 57    highest possible value (about 1 year of calendar time) */ | 
| Chris@42 | 58 static unsigned timelimit_to_flags(double timelimit) | 
| Chris@42 | 59 { | 
| Chris@42 | 60      const double tmax = 365 * 24 * 3600; | 
| Chris@42 | 61      const double tstep = 1.05; | 
| Chris@42 | 62      const int nsteps = (1 << BITS_FOR_TIMELIMIT); | 
| Chris@42 | 63      int x; | 
| Chris@42 | 64 | 
| Chris@42 | 65      if (timelimit < 0 || timelimit >= tmax) | 
| Chris@42 | 66 	  return 0; | 
| Chris@42 | 67      if (timelimit <= 1.0e-10) | 
| Chris@42 | 68 	  return nsteps - 1; | 
| Chris@42 | 69 | 
| Chris@42 | 70      x = (int) (0.5 + (log(tmax / timelimit) / log(tstep))); | 
| Chris@42 | 71 | 
| Chris@42 | 72      if (x < 0) x = 0; | 
| Chris@42 | 73      if (x >= nsteps) x = nsteps - 1; | 
| Chris@42 | 74      return x; | 
| Chris@42 | 75 } | 
| Chris@42 | 76 | 
| Chris@42 | 77 void X(mapflags)(planner *plnr, unsigned flags) | 
| Chris@42 | 78 { | 
| Chris@42 | 79      unsigned l, u, t; | 
| Chris@42 | 80 | 
| Chris@42 | 81      /* map of api flags -> api flags, to implement consistency rules | 
| Chris@42 | 82         and combination flags */ | 
| Chris@42 | 83      const flagop self_flagmap[] = { | 
| Chris@42 | 84 	  /* in some cases (notably for halfcomplex->real transforms), | 
| Chris@42 | 85 	     DESTROY_INPUT is the default, so we need to support | 
| Chris@42 | 86 	     an inverse flag to disable it. | 
| Chris@42 | 87 | 
| Chris@42 | 88 	     (PRESERVE, DESTROY)   ->   (PRESERVE, DESTROY) | 
| Chris@42 | 89                (0, 0)                       (1, 0) | 
| Chris@42 | 90                (0, 1)                       (0, 1) | 
| Chris@42 | 91                (1, 0)                       (1, 0) | 
| Chris@42 | 92                (1, 1)                       (1, 0) | 
| Chris@42 | 93 	  */ | 
| Chris@42 | 94 	  IMPLIES(YES(FFTW_PRESERVE_INPUT), NO(FFTW_DESTROY_INPUT)), | 
| Chris@42 | 95 	  IMPLIES(NO(FFTW_DESTROY_INPUT), YES(FFTW_PRESERVE_INPUT)), | 
| Chris@42 | 96 | 
| Chris@42 | 97 	  IMPLIES(YES(FFTW_EXHAUSTIVE), YES(FFTW_PATIENT)), | 
| Chris@42 | 98 | 
| Chris@42 | 99 	  IMPLIES(YES(FFTW_ESTIMATE), NO(FFTW_PATIENT)), | 
| Chris@42 | 100 	  IMPLIES(YES(FFTW_ESTIMATE), | 
| Chris@42 | 101 		  YES(FFTW_ESTIMATE_PATIENT | 
| Chris@42 | 102 		      | FFTW_NO_INDIRECT_OP | 
| Chris@42 | 103 		      | FFTW_ALLOW_PRUNING)), | 
| Chris@42 | 104 | 
| Chris@42 | 105 	  IMPLIES(NO(FFTW_EXHAUSTIVE), | 
| Chris@42 | 106 		  YES(FFTW_NO_SLOW)), | 
| Chris@42 | 107 | 
| Chris@42 | 108 	  /* a canonical set of fftw2-like impatience flags */ | 
| Chris@42 | 109 	  IMPLIES(NO(FFTW_PATIENT), | 
| Chris@42 | 110 		  YES(FFTW_NO_VRECURSE | 
| Chris@42 | 111 		      | FFTW_NO_RANK_SPLITS | 
| Chris@42 | 112 		      | FFTW_NO_VRANK_SPLITS | 
| Chris@42 | 113 		      | FFTW_NO_NONTHREADED | 
| Chris@42 | 114 		      | FFTW_NO_DFT_R2HC | 
| Chris@42 | 115 		      | FFTW_NO_FIXED_RADIX_LARGE_N | 
| Chris@42 | 116 		      | FFTW_BELIEVE_PCOST)) | 
| Chris@42 | 117      }; | 
| Chris@42 | 118 | 
| Chris@42 | 119      /* map of (processed) api flags to internal problem/planner flags */ | 
| Chris@42 | 120      const flagop l_flagmap[] = { | 
| Chris@42 | 121 	  EQV(FFTW_PRESERVE_INPUT, NO_DESTROY_INPUT), | 
| Chris@42 | 122 	  EQV(FFTW_NO_SIMD, NO_SIMD), | 
| Chris@42 | 123 	  EQV(FFTW_CONSERVE_MEMORY, CONSERVE_MEMORY), | 
| Chris@42 | 124 	  EQV(FFTW_NO_BUFFERING, NO_BUFFERING), | 
| Chris@42 | 125 	  NEQV(FFTW_ALLOW_LARGE_GENERIC, NO_LARGE_GENERIC) | 
| Chris@42 | 126      }; | 
| Chris@42 | 127 | 
| Chris@42 | 128      const flagop u_flagmap[] = { | 
| Chris@42 | 129 	  IMPLIES(YES(FFTW_EXHAUSTIVE), NO(0xFFFFFFFF)), | 
| Chris@42 | 130 	  IMPLIES(NO(FFTW_EXHAUSTIVE), YES(NO_UGLY)), | 
| Chris@42 | 131 | 
| Chris@42 | 132 	  /* the following are undocumented, "beyond-guru" flags that | 
| Chris@42 | 133 	     require some understanding of FFTW internals */ | 
| Chris@42 | 134 	  EQV(FFTW_ESTIMATE_PATIENT, ESTIMATE), | 
| Chris@42 | 135 	  EQV(FFTW_ALLOW_PRUNING, ALLOW_PRUNING), | 
| Chris@42 | 136 	  EQV(FFTW_BELIEVE_PCOST, BELIEVE_PCOST), | 
| Chris@42 | 137 	  EQV(FFTW_NO_DFT_R2HC, NO_DFT_R2HC), | 
| Chris@42 | 138 	  EQV(FFTW_NO_NONTHREADED, NO_NONTHREADED), | 
| Chris@42 | 139 	  EQV(FFTW_NO_INDIRECT_OP, NO_INDIRECT_OP), | 
| Chris@42 | 140 	  EQV(FFTW_NO_RANK_SPLITS, NO_RANK_SPLITS), | 
| Chris@42 | 141 	  EQV(FFTW_NO_VRANK_SPLITS, NO_VRANK_SPLITS), | 
| Chris@42 | 142 	  EQV(FFTW_NO_VRECURSE, NO_VRECURSE), | 
| Chris@42 | 143 	  EQV(FFTW_NO_SLOW, NO_SLOW), | 
| Chris@42 | 144 	  EQV(FFTW_NO_FIXED_RADIX_LARGE_N, NO_FIXED_RADIX_LARGE_N) | 
| Chris@42 | 145      }; | 
| Chris@42 | 146 | 
| Chris@42 | 147      map_flags(&flags, &flags, self_flagmap, NELEM(self_flagmap)); | 
| Chris@42 | 148 | 
| Chris@42 | 149      l = u = 0; | 
| Chris@42 | 150      map_flags(&flags, &l, l_flagmap, NELEM(l_flagmap)); | 
| Chris@42 | 151      map_flags(&flags, &u, u_flagmap, NELEM(u_flagmap)); | 
| Chris@42 | 152 | 
| Chris@42 | 153      /* enforce l <= u  */ | 
| Chris@42 | 154      PLNR_L(plnr) = l; | 
| Chris@42 | 155      PLNR_U(plnr) = u | l; | 
| Chris@42 | 156 | 
| Chris@42 | 157      /* assert that the conversion didn't lose bits */ | 
| Chris@42 | 158      A(PLNR_L(plnr) == l); | 
| Chris@42 | 159      A(PLNR_U(plnr) == (u | l)); | 
| Chris@42 | 160 | 
| Chris@42 | 161      /* compute flags representation of the timelimit */ | 
| Chris@42 | 162      t = timelimit_to_flags(plnr->timelimit); | 
| Chris@42 | 163 | 
| Chris@42 | 164      PLNR_TIMELIMIT_IMPATIENCE(plnr) = t; | 
| Chris@42 | 165      A(PLNR_TIMELIMIT_IMPATIENCE(plnr) == t); | 
| Chris@42 | 166 } |