annotate ext/clapack/src/iparmq.c @ 214:abaa17f7dec5 msvc

Debug build bits
author Chris Cannam
date Tue, 06 Feb 2018 21:12:03 +0000
parents 45330e0d2819
children
rev   line source
Chris@202 1 /* iparmq.f -- translated by f2c (version 20061008).
Chris@202 2 You must link the resulting object file with libf2c:
Chris@202 3 on Microsoft Windows system, link with libf2c.lib;
Chris@202 4 on Linux or Unix systems, link with .../path/to/libf2c.a -lm
Chris@202 5 or, if you install libf2c.a in a standard place, with -lf2c -lm
Chris@202 6 -- in that order, at the end of the command line, as in
Chris@202 7 cc *.o -lf2c -lm
Chris@202 8 Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
Chris@202 9
Chris@202 10 http://www.netlib.org/f2c/libf2c.zip
Chris@202 11 */
Chris@202 12
Chris@202 13 #include "f2c.h"
Chris@202 14 #include "blaswrap.h"
Chris@202 15
Chris@202 16 integer iparmq_(integer *ispec, char *name__, char *opts, integer *n, integer
Chris@202 17 *ilo, integer *ihi, integer *lwork)
Chris@202 18 {
Chris@202 19 /* System generated locals */
Chris@202 20 integer ret_val, i__1, i__2;
Chris@202 21 real r__1;
Chris@202 22
Chris@202 23 /* Builtin functions */
Chris@202 24 double log(doublereal);
Chris@202 25 integer i_nint(real *);
Chris@202 26
Chris@202 27 /* Local variables */
Chris@202 28 integer nh, ns;
Chris@202 29
Chris@202 30
Chris@202 31 /* -- LAPACK auxiliary routine (version 3.2) -- */
Chris@202 32 /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
Chris@202 33 /* November 2006 */
Chris@202 34
Chris@202 35 /* .. Scalar Arguments .. */
Chris@202 36
Chris@202 37 /* Purpose */
Chris@202 38 /* ======= */
Chris@202 39
Chris@202 40 /* This program sets problem and machine dependent parameters */
Chris@202 41 /* useful for xHSEQR and its subroutines. It is called whenever */
Chris@202 42 /* ILAENV is called with 12 <= ISPEC <= 16 */
Chris@202 43
Chris@202 44 /* Arguments */
Chris@202 45 /* ========= */
Chris@202 46
Chris@202 47 /* ISPEC (input) integer scalar */
Chris@202 48 /* ISPEC specifies which tunable parameter IPARMQ should */
Chris@202 49 /* return. */
Chris@202 50
Chris@202 51 /* ISPEC=12: (INMIN) Matrices of order nmin or less */
Chris@202 52 /* are sent directly to xLAHQR, the implicit */
Chris@202 53 /* double shift QR algorithm. NMIN must be */
Chris@202 54 /* at least 11. */
Chris@202 55
Chris@202 56 /* ISPEC=13: (INWIN) Size of the deflation window. */
Chris@202 57 /* This is best set greater than or equal to */
Chris@202 58 /* the number of simultaneous shifts NS. */
Chris@202 59 /* Larger matrices benefit from larger deflation */
Chris@202 60 /* windows. */
Chris@202 61
Chris@202 62 /* ISPEC=14: (INIBL) Determines when to stop nibbling and */
Chris@202 63 /* invest in an (expensive) multi-shift QR sweep. */
Chris@202 64 /* If the aggressive early deflation subroutine */
Chris@202 65 /* finds LD converged eigenvalues from an order */
Chris@202 66 /* NW deflation window and LD.GT.(NW*NIBBLE)/100, */
Chris@202 67 /* then the next QR sweep is skipped and early */
Chris@202 68 /* deflation is applied immediately to the */
Chris@202 69 /* remaining active diagonal block. Setting */
Chris@202 70 /* IPARMQ(ISPEC=14) = 0 causes TTQRE to skip a */
Chris@202 71 /* multi-shift QR sweep whenever early deflation */
Chris@202 72 /* finds a converged eigenvalue. Setting */
Chris@202 73 /* IPARMQ(ISPEC=14) greater than or equal to 100 */
Chris@202 74 /* prevents TTQRE from skipping a multi-shift */
Chris@202 75 /* QR sweep. */
Chris@202 76
Chris@202 77 /* ISPEC=15: (NSHFTS) The number of simultaneous shifts in */
Chris@202 78 /* a multi-shift QR iteration. */
Chris@202 79
Chris@202 80 /* ISPEC=16: (IACC22) IPARMQ is set to 0, 1 or 2 with the */
Chris@202 81 /* following meanings. */
Chris@202 82 /* 0: During the multi-shift QR sweep, */
Chris@202 83 /* xLAQR5 does not accumulate reflections and */
Chris@202 84 /* does not use matrix-matrix multiply to */
Chris@202 85 /* update the far-from-diagonal matrix */
Chris@202 86 /* entries. */
Chris@202 87 /* 1: During the multi-shift QR sweep, */
Chris@202 88 /* xLAQR5 and/or xLAQRaccumulates reflections and uses */
Chris@202 89 /* matrix-matrix multiply to update the */
Chris@202 90 /* far-from-diagonal matrix entries. */
Chris@202 91 /* 2: During the multi-shift QR sweep. */
Chris@202 92 /* xLAQR5 accumulates reflections and takes */
Chris@202 93 /* advantage of 2-by-2 block structure during */
Chris@202 94 /* matrix-matrix multiplies. */
Chris@202 95 /* (If xTRMM is slower than xGEMM, then */
Chris@202 96 /* IPARMQ(ISPEC=16)=1 may be more efficient than */
Chris@202 97 /* IPARMQ(ISPEC=16)=2 despite the greater level of */
Chris@202 98 /* arithmetic work implied by the latter choice.) */
Chris@202 99
Chris@202 100 /* NAME (input) character string */
Chris@202 101 /* Name of the calling subroutine */
Chris@202 102
Chris@202 103 /* OPTS (input) character string */
Chris@202 104 /* This is a concatenation of the string arguments to */
Chris@202 105 /* TTQRE. */
Chris@202 106
Chris@202 107 /* N (input) integer scalar */
Chris@202 108 /* N is the order of the Hessenberg matrix H. */
Chris@202 109
Chris@202 110 /* ILO (input) INTEGER */
Chris@202 111 /* IHI (input) INTEGER */
Chris@202 112 /* It is assumed that H is already upper triangular */
Chris@202 113 /* in rows and columns 1:ILO-1 and IHI+1:N. */
Chris@202 114
Chris@202 115 /* LWORK (input) integer scalar */
Chris@202 116 /* The amount of workspace available. */
Chris@202 117
Chris@202 118 /* Further Details */
Chris@202 119 /* =============== */
Chris@202 120
Chris@202 121 /* Little is known about how best to choose these parameters. */
Chris@202 122 /* It is possible to use different values of the parameters */
Chris@202 123 /* for each of CHSEQR, DHSEQR, SHSEQR and ZHSEQR. */
Chris@202 124
Chris@202 125 /* It is probably best to choose different parameters for */
Chris@202 126 /* different matrices and different parameters at different */
Chris@202 127 /* times during the iteration, but this has not been */
Chris@202 128 /* implemented --- yet. */
Chris@202 129
Chris@202 130
Chris@202 131 /* The best choices of most of the parameters depend */
Chris@202 132 /* in an ill-understood way on the relative execution */
Chris@202 133 /* rate of xLAQR3 and xLAQR5 and on the nature of each */
Chris@202 134 /* particular eigenvalue problem. Experiment may be the */
Chris@202 135 /* only practical way to determine which choices are most */
Chris@202 136 /* effective. */
Chris@202 137
Chris@202 138 /* Following is a list of default values supplied by IPARMQ. */
Chris@202 139 /* These defaults may be adjusted in order to attain better */
Chris@202 140 /* performance in any particular computational environment. */
Chris@202 141
Chris@202 142 /* IPARMQ(ISPEC=12) The xLAHQR vs xLAQR0 crossover point. */
Chris@202 143 /* Default: 75. (Must be at least 11.) */
Chris@202 144
Chris@202 145 /* IPARMQ(ISPEC=13) Recommended deflation window size. */
Chris@202 146 /* This depends on ILO, IHI and NS, the */
Chris@202 147 /* number of simultaneous shifts returned */
Chris@202 148 /* by IPARMQ(ISPEC=15). The default for */
Chris@202 149 /* (IHI-ILO+1).LE.500 is NS. The default */
Chris@202 150 /* for (IHI-ILO+1).GT.500 is 3*NS/2. */
Chris@202 151
Chris@202 152 /* IPARMQ(ISPEC=14) Nibble crossover point. Default: 14. */
Chris@202 153
Chris@202 154 /* IPARMQ(ISPEC=15) Number of simultaneous shifts, NS. */
Chris@202 155 /* a multi-shift QR iteration. */
Chris@202 156
Chris@202 157 /* If IHI-ILO+1 is ... */
Chris@202 158
Chris@202 159 /* greater than ...but less ... the */
Chris@202 160 /* or equal to ... than default is */
Chris@202 161
Chris@202 162 /* 0 30 NS = 2+ */
Chris@202 163 /* 30 60 NS = 4+ */
Chris@202 164 /* 60 150 NS = 10 */
Chris@202 165 /* 150 590 NS = ** */
Chris@202 166 /* 590 3000 NS = 64 */
Chris@202 167 /* 3000 6000 NS = 128 */
Chris@202 168 /* 6000 infinity NS = 256 */
Chris@202 169
Chris@202 170 /* (+) By default matrices of this order are */
Chris@202 171 /* passed to the implicit double shift routine */
Chris@202 172 /* xLAHQR. See IPARMQ(ISPEC=12) above. These */
Chris@202 173 /* values of NS are used only in case of a rare */
Chris@202 174 /* xLAHQR failure. */
Chris@202 175
Chris@202 176 /* (**) The asterisks (**) indicate an ad-hoc */
Chris@202 177 /* function increasing from 10 to 64. */
Chris@202 178
Chris@202 179 /* IPARMQ(ISPEC=16) Select structured matrix multiply. */
Chris@202 180 /* (See ISPEC=16 above for details.) */
Chris@202 181 /* Default: 3. */
Chris@202 182
Chris@202 183 /* ================================================================ */
Chris@202 184 /* .. Parameters .. */
Chris@202 185 /* .. */
Chris@202 186 /* .. Local Scalars .. */
Chris@202 187 /* .. */
Chris@202 188 /* .. Intrinsic Functions .. */
Chris@202 189 /* .. */
Chris@202 190 /* .. Executable Statements .. */
Chris@202 191 if (*ispec == 15 || *ispec == 13 || *ispec == 16) {
Chris@202 192
Chris@202 193 /* ==== Set the number simultaneous shifts ==== */
Chris@202 194
Chris@202 195 nh = *ihi - *ilo + 1;
Chris@202 196 ns = 2;
Chris@202 197 if (nh >= 30) {
Chris@202 198 ns = 4;
Chris@202 199 }
Chris@202 200 if (nh >= 60) {
Chris@202 201 ns = 10;
Chris@202 202 }
Chris@202 203 if (nh >= 150) {
Chris@202 204 /* Computing MAX */
Chris@202 205 r__1 = log((real) nh) / log(2.f);
Chris@202 206 i__1 = 10, i__2 = nh / i_nint(&r__1);
Chris@202 207 ns = max(i__1,i__2);
Chris@202 208 }
Chris@202 209 if (nh >= 590) {
Chris@202 210 ns = 64;
Chris@202 211 }
Chris@202 212 if (nh >= 3000) {
Chris@202 213 ns = 128;
Chris@202 214 }
Chris@202 215 if (nh >= 6000) {
Chris@202 216 ns = 256;
Chris@202 217 }
Chris@202 218 /* Computing MAX */
Chris@202 219 i__1 = 2, i__2 = ns - ns % 2;
Chris@202 220 ns = max(i__1,i__2);
Chris@202 221 }
Chris@202 222
Chris@202 223 if (*ispec == 12) {
Chris@202 224
Chris@202 225
Chris@202 226 /* ===== Matrices of order smaller than NMIN get sent */
Chris@202 227 /* . to xLAHQR, the classic double shift algorithm. */
Chris@202 228 /* . This must be at least 11. ==== */
Chris@202 229
Chris@202 230 ret_val = 75;
Chris@202 231
Chris@202 232 } else if (*ispec == 14) {
Chris@202 233
Chris@202 234 /* ==== INIBL: skip a multi-shift qr iteration and */
Chris@202 235 /* . whenever aggressive early deflation finds */
Chris@202 236 /* . at least (NIBBLE*(window size)/100) deflations. ==== */
Chris@202 237
Chris@202 238 ret_val = 14;
Chris@202 239
Chris@202 240 } else if (*ispec == 15) {
Chris@202 241
Chris@202 242 /* ==== NSHFTS: The number of simultaneous shifts ===== */
Chris@202 243
Chris@202 244 ret_val = ns;
Chris@202 245
Chris@202 246 } else if (*ispec == 13) {
Chris@202 247
Chris@202 248 /* ==== NW: deflation window size. ==== */
Chris@202 249
Chris@202 250 if (nh <= 500) {
Chris@202 251 ret_val = ns;
Chris@202 252 } else {
Chris@202 253 ret_val = ns * 3 / 2;
Chris@202 254 }
Chris@202 255
Chris@202 256 } else if (*ispec == 16) {
Chris@202 257
Chris@202 258 /* ==== IACC22: Whether to accumulate reflections */
Chris@202 259 /* . before updating the far-from-diagonal elements */
Chris@202 260 /* . and whether to use 2-by-2 block structure while */
Chris@202 261 /* . doing it. A small amount of work could be saved */
Chris@202 262 /* . by making this choice dependent also upon the */
Chris@202 263 /* . NH=IHI-ILO+1. */
Chris@202 264
Chris@202 265 ret_val = 0;
Chris@202 266 if (ns >= 14) {
Chris@202 267 ret_val = 1;
Chris@202 268 }
Chris@202 269 if (ns >= 14) {
Chris@202 270 ret_val = 2;
Chris@202 271 }
Chris@202 272
Chris@202 273 } else {
Chris@202 274 /* ===== invalid value of ispec ===== */
Chris@202 275 ret_val = -1;
Chris@202 276
Chris@202 277 }
Chris@202 278
Chris@202 279 /* ==== End of IPARMQ ==== */
Chris@202 280
Chris@202 281 return ret_val;
Chris@202 282 } /* iparmq_ */