c@427
|
1 /* iparmq.f -- translated by f2c (version 20061008).
|
c@427
|
2 You must link the resulting object file with libf2c:
|
c@427
|
3 on Microsoft Windows system, link with libf2c.lib;
|
c@427
|
4 on Linux or Unix systems, link with .../path/to/libf2c.a -lm
|
c@427
|
5 or, if you install libf2c.a in a standard place, with -lf2c -lm
|
c@427
|
6 -- in that order, at the end of the command line, as in
|
c@427
|
7 cc *.o -lf2c -lm
|
c@427
|
8 Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
|
c@427
|
9
|
c@427
|
10 http://www.netlib.org/f2c/libf2c.zip
|
c@427
|
11 */
|
c@427
|
12
|
c@427
|
13 #include "f2c.h"
|
c@427
|
14 #include "blaswrap.h"
|
c@427
|
15
|
c@427
|
16 integer iparmq_(integer *ispec, char *name__, char *opts, integer *n, integer
|
c@427
|
17 *ilo, integer *ihi, integer *lwork)
|
c@427
|
18 {
|
c@427
|
19 /* System generated locals */
|
c@427
|
20 integer ret_val, i__1, i__2;
|
c@427
|
21 real r__1;
|
c@427
|
22
|
c@427
|
23 /* Builtin functions */
|
c@427
|
24 double log(doublereal);
|
c@427
|
25 integer i_nint(real *);
|
c@427
|
26
|
c@427
|
27 /* Local variables */
|
c@427
|
28 integer nh, ns;
|
c@427
|
29
|
c@427
|
30
|
c@427
|
31 /* -- LAPACK auxiliary routine (version 3.2) -- */
|
c@427
|
32 /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
|
c@427
|
33 /* November 2006 */
|
c@427
|
34
|
c@427
|
35 /* .. Scalar Arguments .. */
|
c@427
|
36
|
c@427
|
37 /* Purpose */
|
c@427
|
38 /* ======= */
|
c@427
|
39
|
c@427
|
40 /* This program sets problem and machine dependent parameters */
|
c@427
|
41 /* useful for xHSEQR and its subroutines. It is called whenever */
|
c@427
|
42 /* ILAENV is called with 12 <= ISPEC <= 16 */
|
c@427
|
43
|
c@427
|
44 /* Arguments */
|
c@427
|
45 /* ========= */
|
c@427
|
46
|
c@427
|
47 /* ISPEC (input) integer scalar */
|
c@427
|
48 /* ISPEC specifies which tunable parameter IPARMQ should */
|
c@427
|
49 /* return. */
|
c@427
|
50
|
c@427
|
51 /* ISPEC=12: (INMIN) Matrices of order nmin or less */
|
c@427
|
52 /* are sent directly to xLAHQR, the implicit */
|
c@427
|
53 /* double shift QR algorithm. NMIN must be */
|
c@427
|
54 /* at least 11. */
|
c@427
|
55
|
c@427
|
56 /* ISPEC=13: (INWIN) Size of the deflation window. */
|
c@427
|
57 /* This is best set greater than or equal to */
|
c@427
|
58 /* the number of simultaneous shifts NS. */
|
c@427
|
59 /* Larger matrices benefit from larger deflation */
|
c@427
|
60 /* windows. */
|
c@427
|
61
|
c@427
|
62 /* ISPEC=14: (INIBL) Determines when to stop nibbling and */
|
c@427
|
63 /* invest in an (expensive) multi-shift QR sweep. */
|
c@427
|
64 /* If the aggressive early deflation subroutine */
|
c@427
|
65 /* finds LD converged eigenvalues from an order */
|
c@427
|
66 /* NW deflation window and LD.GT.(NW*NIBBLE)/100, */
|
c@427
|
67 /* then the next QR sweep is skipped and early */
|
c@427
|
68 /* deflation is applied immediately to the */
|
c@427
|
69 /* remaining active diagonal block. Setting */
|
c@427
|
70 /* IPARMQ(ISPEC=14) = 0 causes TTQRE to skip a */
|
c@427
|
71 /* multi-shift QR sweep whenever early deflation */
|
c@427
|
72 /* finds a converged eigenvalue. Setting */
|
c@427
|
73 /* IPARMQ(ISPEC=14) greater than or equal to 100 */
|
c@427
|
74 /* prevents TTQRE from skipping a multi-shift */
|
c@427
|
75 /* QR sweep. */
|
c@427
|
76
|
c@427
|
77 /* ISPEC=15: (NSHFTS) The number of simultaneous shifts in */
|
c@427
|
78 /* a multi-shift QR iteration. */
|
c@427
|
79
|
c@427
|
80 /* ISPEC=16: (IACC22) IPARMQ is set to 0, 1 or 2 with the */
|
c@427
|
81 /* following meanings. */
|
c@427
|
82 /* 0: During the multi-shift QR sweep, */
|
c@427
|
83 /* xLAQR5 does not accumulate reflections and */
|
c@427
|
84 /* does not use matrix-matrix multiply to */
|
c@427
|
85 /* update the far-from-diagonal matrix */
|
c@427
|
86 /* entries. */
|
c@427
|
87 /* 1: During the multi-shift QR sweep, */
|
c@427
|
88 /* xLAQR5 and/or xLAQRaccumulates reflections and uses */
|
c@427
|
89 /* matrix-matrix multiply to update the */
|
c@427
|
90 /* far-from-diagonal matrix entries. */
|
c@427
|
91 /* 2: During the multi-shift QR sweep. */
|
c@427
|
92 /* xLAQR5 accumulates reflections and takes */
|
c@427
|
93 /* advantage of 2-by-2 block structure during */
|
c@427
|
94 /* matrix-matrix multiplies. */
|
c@427
|
95 /* (If xTRMM is slower than xGEMM, then */
|
c@427
|
96 /* IPARMQ(ISPEC=16)=1 may be more efficient than */
|
c@427
|
97 /* IPARMQ(ISPEC=16)=2 despite the greater level of */
|
c@427
|
98 /* arithmetic work implied by the latter choice.) */
|
c@427
|
99
|
c@427
|
100 /* NAME (input) character string */
|
c@427
|
101 /* Name of the calling subroutine */
|
c@427
|
102
|
c@427
|
103 /* OPTS (input) character string */
|
c@427
|
104 /* This is a concatenation of the string arguments to */
|
c@427
|
105 /* TTQRE. */
|
c@427
|
106
|
c@427
|
107 /* N (input) integer scalar */
|
c@427
|
108 /* N is the order of the Hessenberg matrix H. */
|
c@427
|
109
|
c@427
|
110 /* ILO (input) INTEGER */
|
c@427
|
111 /* IHI (input) INTEGER */
|
c@427
|
112 /* It is assumed that H is already upper triangular */
|
c@427
|
113 /* in rows and columns 1:ILO-1 and IHI+1:N. */
|
c@427
|
114
|
c@427
|
115 /* LWORK (input) integer scalar */
|
c@427
|
116 /* The amount of workspace available. */
|
c@427
|
117
|
c@427
|
118 /* Further Details */
|
c@427
|
119 /* =============== */
|
c@427
|
120
|
c@427
|
121 /* Little is known about how best to choose these parameters. */
|
c@427
|
122 /* It is possible to use different values of the parameters */
|
c@427
|
123 /* for each of CHSEQR, DHSEQR, SHSEQR and ZHSEQR. */
|
c@427
|
124
|
c@427
|
125 /* It is probably best to choose different parameters for */
|
c@427
|
126 /* different matrices and different parameters at different */
|
c@427
|
127 /* times during the iteration, but this has not been */
|
c@427
|
128 /* implemented --- yet. */
|
c@427
|
129
|
c@427
|
130
|
c@427
|
131 /* The best choices of most of the parameters depend */
|
c@427
|
132 /* in an ill-understood way on the relative execution */
|
c@427
|
133 /* rate of xLAQR3 and xLAQR5 and on the nature of each */
|
c@427
|
134 /* particular eigenvalue problem. Experiment may be the */
|
c@427
|
135 /* only practical way to determine which choices are most */
|
c@427
|
136 /* effective. */
|
c@427
|
137
|
c@427
|
138 /* Following is a list of default values supplied by IPARMQ. */
|
c@427
|
139 /* These defaults may be adjusted in order to attain better */
|
c@427
|
140 /* performance in any particular computational environment. */
|
c@427
|
141
|
c@427
|
142 /* IPARMQ(ISPEC=12) The xLAHQR vs xLAQR0 crossover point. */
|
c@427
|
143 /* Default: 75. (Must be at least 11.) */
|
c@427
|
144
|
c@427
|
145 /* IPARMQ(ISPEC=13) Recommended deflation window size. */
|
c@427
|
146 /* This depends on ILO, IHI and NS, the */
|
c@427
|
147 /* number of simultaneous shifts returned */
|
c@427
|
148 /* by IPARMQ(ISPEC=15). The default for */
|
c@427
|
149 /* (IHI-ILO+1).LE.500 is NS. The default */
|
c@427
|
150 /* for (IHI-ILO+1).GT.500 is 3*NS/2. */
|
c@427
|
151
|
c@427
|
152 /* IPARMQ(ISPEC=14) Nibble crossover point. Default: 14. */
|
c@427
|
153
|
c@427
|
154 /* IPARMQ(ISPEC=15) Number of simultaneous shifts, NS. */
|
c@427
|
155 /* a multi-shift QR iteration. */
|
c@427
|
156
|
c@427
|
157 /* If IHI-ILO+1 is ... */
|
c@427
|
158
|
c@427
|
159 /* greater than ...but less ... the */
|
c@427
|
160 /* or equal to ... than default is */
|
c@427
|
161
|
c@427
|
162 /* 0 30 NS = 2+ */
|
c@427
|
163 /* 30 60 NS = 4+ */
|
c@427
|
164 /* 60 150 NS = 10 */
|
c@427
|
165 /* 150 590 NS = ** */
|
c@427
|
166 /* 590 3000 NS = 64 */
|
c@427
|
167 /* 3000 6000 NS = 128 */
|
c@427
|
168 /* 6000 infinity NS = 256 */
|
c@427
|
169
|
c@427
|
170 /* (+) By default matrices of this order are */
|
c@427
|
171 /* passed to the implicit double shift routine */
|
c@427
|
172 /* xLAHQR. See IPARMQ(ISPEC=12) above. These */
|
c@427
|
173 /* values of NS are used only in case of a rare */
|
c@427
|
174 /* xLAHQR failure. */
|
c@427
|
175
|
c@427
|
176 /* (**) The asterisks (**) indicate an ad-hoc */
|
c@427
|
177 /* function increasing from 10 to 64. */
|
c@427
|
178
|
c@427
|
179 /* IPARMQ(ISPEC=16) Select structured matrix multiply. */
|
c@427
|
180 /* (See ISPEC=16 above for details.) */
|
c@427
|
181 /* Default: 3. */
|
c@427
|
182
|
c@427
|
183 /* ================================================================ */
|
c@427
|
184 /* .. Parameters .. */
|
c@427
|
185 /* .. */
|
c@427
|
186 /* .. Local Scalars .. */
|
c@427
|
187 /* .. */
|
c@427
|
188 /* .. Intrinsic Functions .. */
|
c@427
|
189 /* .. */
|
c@427
|
190 /* .. Executable Statements .. */
|
c@427
|
191 if (*ispec == 15 || *ispec == 13 || *ispec == 16) {
|
c@427
|
192
|
c@427
|
193 /* ==== Set the number simultaneous shifts ==== */
|
c@427
|
194
|
c@427
|
195 nh = *ihi - *ilo + 1;
|
c@427
|
196 ns = 2;
|
c@427
|
197 if (nh >= 30) {
|
c@427
|
198 ns = 4;
|
c@427
|
199 }
|
c@427
|
200 if (nh >= 60) {
|
c@427
|
201 ns = 10;
|
c@427
|
202 }
|
c@427
|
203 if (nh >= 150) {
|
c@427
|
204 /* Computing MAX */
|
c@427
|
205 r__1 = log((real) nh) / log(2.f);
|
c@427
|
206 i__1 = 10, i__2 = nh / i_nint(&r__1);
|
c@427
|
207 ns = max(i__1,i__2);
|
c@427
|
208 }
|
c@427
|
209 if (nh >= 590) {
|
c@427
|
210 ns = 64;
|
c@427
|
211 }
|
c@427
|
212 if (nh >= 3000) {
|
c@427
|
213 ns = 128;
|
c@427
|
214 }
|
c@427
|
215 if (nh >= 6000) {
|
c@427
|
216 ns = 256;
|
c@427
|
217 }
|
c@427
|
218 /* Computing MAX */
|
c@427
|
219 i__1 = 2, i__2 = ns - ns % 2;
|
c@427
|
220 ns = max(i__1,i__2);
|
c@427
|
221 }
|
c@427
|
222
|
c@427
|
223 if (*ispec == 12) {
|
c@427
|
224
|
c@427
|
225
|
c@427
|
226 /* ===== Matrices of order smaller than NMIN get sent */
|
c@427
|
227 /* . to xLAHQR, the classic double shift algorithm. */
|
c@427
|
228 /* . This must be at least 11. ==== */
|
c@427
|
229
|
c@427
|
230 ret_val = 75;
|
c@427
|
231
|
c@427
|
232 } else if (*ispec == 14) {
|
c@427
|
233
|
c@427
|
234 /* ==== INIBL: skip a multi-shift qr iteration and */
|
c@427
|
235 /* . whenever aggressive early deflation finds */
|
c@427
|
236 /* . at least (NIBBLE*(window size)/100) deflations. ==== */
|
c@427
|
237
|
c@427
|
238 ret_val = 14;
|
c@427
|
239
|
c@427
|
240 } else if (*ispec == 15) {
|
c@427
|
241
|
c@427
|
242 /* ==== NSHFTS: The number of simultaneous shifts ===== */
|
c@427
|
243
|
c@427
|
244 ret_val = ns;
|
c@427
|
245
|
c@427
|
246 } else if (*ispec == 13) {
|
c@427
|
247
|
c@427
|
248 /* ==== NW: deflation window size. ==== */
|
c@427
|
249
|
c@427
|
250 if (nh <= 500) {
|
c@427
|
251 ret_val = ns;
|
c@427
|
252 } else {
|
c@427
|
253 ret_val = ns * 3 / 2;
|
c@427
|
254 }
|
c@427
|
255
|
c@427
|
256 } else if (*ispec == 16) {
|
c@427
|
257
|
c@427
|
258 /* ==== IACC22: Whether to accumulate reflections */
|
c@427
|
259 /* . before updating the far-from-diagonal elements */
|
c@427
|
260 /* . and whether to use 2-by-2 block structure while */
|
c@427
|
261 /* . doing it. A small amount of work could be saved */
|
c@427
|
262 /* . by making this choice dependent also upon the */
|
c@427
|
263 /* . NH=IHI-ILO+1. */
|
c@427
|
264
|
c@427
|
265 ret_val = 0;
|
c@427
|
266 if (ns >= 14) {
|
c@427
|
267 ret_val = 1;
|
c@427
|
268 }
|
c@427
|
269 if (ns >= 14) {
|
c@427
|
270 ret_val = 2;
|
c@427
|
271 }
|
c@427
|
272
|
c@427
|
273 } else {
|
c@427
|
274 /* ===== invalid value of ispec ===== */
|
c@427
|
275 ret_val = -1;
|
c@427
|
276
|
c@427
|
277 }
|
c@427
|
278
|
c@427
|
279 /* ==== End of IPARMQ ==== */
|
c@427
|
280
|
c@427
|
281 return ret_val;
|
c@427
|
282 } /* iparmq_ */
|