Mercurial > hg > sv-dependency-builds
comparison src/fftw-3.3.8/kernel/planner.c @ 82:d0c2a83c1364
Add FFTW 3.3.8 source, and a Linux build
author | Chris Cannam |
---|---|
date | Tue, 19 Nov 2019 14:52:55 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
81:7029a4916348 | 82:d0c2a83c1364 |
---|---|
1 /* | |
2 * Copyright (c) 2000 Matteo Frigo | |
3 * Copyright (c) 2000 Massachusetts Institute of Technology | |
4 * | |
5 * This program is free software; you can redistribute it and/or modify | |
6 * it under the terms of the GNU General Public License as published by | |
7 * the Free Software Foundation; either version 2 of the License, or | |
8 * (at your option) any later version. | |
9 * | |
10 * This program is distributed in the hope that it will be useful, | |
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 * GNU General Public License for more details. | |
14 * | |
15 * You should have received a copy of the GNU General Public License | |
16 * along with this program; if not, write to the Free Software | |
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
18 * | |
19 */ | |
20 | |
21 #include "kernel/ifftw.h" | |
22 #include <string.h> | |
23 | |
24 /* GNU Coding Standards, Sec. 5.2: "Please write the comments in a GNU | |
25 program in English, because English is the one language that nearly | |
26 all programmers in all countries can read." | |
27 | |
28 ingemisco tanquam reus | |
29 culpa rubet vultus meus | |
30 supplicanti parce [rms] | |
31 */ | |
32 | |
33 #define VALIDP(solution) ((solution)->flags.hash_info & H_VALID) | |
34 #define LIVEP(solution) ((solution)->flags.hash_info & H_LIVE) | |
35 #define SLVNDX(solution) ((solution)->flags.slvndx) | |
36 #define BLISS(flags) (((flags).hash_info) & BLESSING) | |
37 #define INFEASIBLE_SLVNDX ((1U<<BITS_FOR_SLVNDX)-1) | |
38 | |
39 | |
40 #define MAXNAM 64 /* maximum length of registrar's name. | |
41 Used for reading wisdom. There is no point | |
42 in doing this right */ | |
43 | |
44 | |
45 #ifdef FFTW_DEBUG | |
46 static void check(hashtab *ht); | |
47 #endif | |
48 | |
49 /* x <= y */ | |
50 #define LEQ(x, y) (((x) & (y)) == (x)) | |
51 | |
52 /* A subsumes B */ | |
53 static int subsumes(const flags_t *a, unsigned slvndx_a, const flags_t *b) | |
54 { | |
55 if (slvndx_a != INFEASIBLE_SLVNDX) { | |
56 A(a->timelimit_impatience == 0); | |
57 return (LEQ(a->u, b->u) && LEQ(b->l, a->l)); | |
58 } else { | |
59 return (LEQ(a->l, b->l) | |
60 && a->timelimit_impatience <= b->timelimit_impatience); | |
61 } | |
62 } | |
63 | |
64 static unsigned addmod(unsigned a, unsigned b, unsigned p) | |
65 { | |
66 /* gcc-2.95/sparc produces incorrect code for the fast version below. */ | |
67 #if defined(__sparc__) && defined(__GNUC__) | |
68 /* slow version */ | |
69 return (a + b) % p; | |
70 #else | |
71 /* faster version */ | |
72 unsigned c = a + b; | |
73 return c >= p ? c - p : c; | |
74 #endif | |
75 } | |
76 | |
77 /* | |
78 slvdesc management: | |
79 */ | |
80 static void sgrow(planner *ego) | |
81 { | |
82 unsigned osiz = ego->slvdescsiz, nsiz = 1 + osiz + osiz / 4; | |
83 slvdesc *ntab = (slvdesc *)MALLOC(nsiz * sizeof(slvdesc), SLVDESCS); | |
84 slvdesc *otab = ego->slvdescs; | |
85 unsigned i; | |
86 | |
87 ego->slvdescs = ntab; | |
88 ego->slvdescsiz = nsiz; | |
89 for (i = 0; i < osiz; ++i) | |
90 ntab[i] = otab[i]; | |
91 X(ifree0)(otab); | |
92 } | |
93 | |
94 static void register_solver(planner *ego, solver *s) | |
95 { | |
96 slvdesc *n; | |
97 int kind; | |
98 | |
99 if (s) { /* add s to solver list */ | |
100 X(solver_use)(s); | |
101 | |
102 A(ego->nslvdesc < INFEASIBLE_SLVNDX); | |
103 if (ego->nslvdesc >= ego->slvdescsiz) | |
104 sgrow(ego); | |
105 | |
106 n = ego->slvdescs + ego->nslvdesc; | |
107 | |
108 n->slv = s; | |
109 n->reg_nam = ego->cur_reg_nam; | |
110 n->reg_id = ego->cur_reg_id++; | |
111 | |
112 A(strlen(n->reg_nam) < MAXNAM); | |
113 n->nam_hash = X(hash)(n->reg_nam); | |
114 | |
115 kind = s->adt->problem_kind; | |
116 n->next_for_same_problem_kind = ego->slvdescs_for_problem_kind[kind]; | |
117 ego->slvdescs_for_problem_kind[kind] = (int)/*from unsigned*/ego->nslvdesc; | |
118 | |
119 ego->nslvdesc++; | |
120 } | |
121 } | |
122 | |
123 static unsigned slookup(planner *ego, char *nam, int id) | |
124 { | |
125 unsigned h = X(hash)(nam); /* used to avoid strcmp in the common case */ | |
126 FORALL_SOLVERS(ego, s, sp, { | |
127 UNUSED(s); | |
128 if (sp->reg_id == id && sp->nam_hash == h | |
129 && !strcmp(sp->reg_nam, nam)) | |
130 return (unsigned)/*from ptrdiff_t*/(sp - ego->slvdescs); | |
131 }); | |
132 return INFEASIBLE_SLVNDX; | |
133 } | |
134 | |
135 /* Compute a MD5 hash of the configuration of the planner. | |
136 We store it into the wisdom file to make absolutely sure that | |
137 we are reading wisdom that is applicable */ | |
138 static void signature_of_configuration(md5 *m, planner *ego) | |
139 { | |
140 X(md5begin)(m); | |
141 X(md5unsigned)(m, sizeof(R)); /* so we don't mix different precisions */ | |
142 FORALL_SOLVERS(ego, s, sp, { | |
143 UNUSED(s); | |
144 X(md5int)(m, sp->reg_id); | |
145 X(md5puts)(m, sp->reg_nam); | |
146 }); | |
147 X(md5end)(m); | |
148 } | |
149 | |
150 /* | |
151 md5-related stuff: | |
152 */ | |
153 | |
154 /* first hash function */ | |
155 static unsigned h1(const hashtab *ht, const md5sig s) | |
156 { | |
157 unsigned h = s[0] % ht->hashsiz; | |
158 A(h == (s[0] % ht->hashsiz)); | |
159 return h; | |
160 } | |
161 | |
162 /* second hash function (for double hashing) */ | |
163 static unsigned h2(const hashtab *ht, const md5sig s) | |
164 { | |
165 unsigned h = 1U + s[1] % (ht->hashsiz - 1); | |
166 A(h == (1U + s[1] % (ht->hashsiz - 1))); | |
167 return h; | |
168 } | |
169 | |
170 static void md5hash(md5 *m, const problem *p, const planner *plnr) | |
171 { | |
172 X(md5begin)(m); | |
173 X(md5unsigned)(m, sizeof(R)); /* so we don't mix different precisions */ | |
174 X(md5int)(m, plnr->nthr); | |
175 p->adt->hash(p, m); | |
176 X(md5end)(m); | |
177 } | |
178 | |
179 static int md5eq(const md5sig a, const md5sig b) | |
180 { | |
181 return a[0] == b[0] && a[1] == b[1] && a[2] == b[2] && a[3] == b[3]; | |
182 } | |
183 | |
184 static void sigcpy(const md5sig a, md5sig b) | |
185 { | |
186 b[0] = a[0]; b[1] = a[1]; b[2] = a[2]; b[3] = a[3]; | |
187 } | |
188 | |
189 /* | |
190 memoization routines : | |
191 */ | |
192 | |
193 /* | |
194 liber scriptus proferetur | |
195 in quo totum continetur | |
196 unde mundus iudicetur | |
197 */ | |
198 struct solution_s { | |
199 md5sig s; | |
200 flags_t flags; | |
201 }; | |
202 | |
203 static solution *htab_lookup(hashtab *ht, const md5sig s, | |
204 const flags_t *flagsp) | |
205 { | |
206 unsigned g, h = h1(ht, s), d = h2(ht, s); | |
207 solution *best = 0; | |
208 | |
209 ++ht->lookup; | |
210 | |
211 /* search all entries that match; select the one with | |
212 the lowest flags.u */ | |
213 /* This loop may potentially traverse the whole table, since at | |
214 least one element is guaranteed to be !LIVEP, but all elements | |
215 may be VALIDP. Hence, we stop after at the first invalid | |
216 element or after traversing the whole table. */ | |
217 g = h; | |
218 do { | |
219 solution *l = ht->solutions + g; | |
220 ++ht->lookup_iter; | |
221 if (VALIDP(l)) { | |
222 if (LIVEP(l) | |
223 && md5eq(s, l->s) | |
224 && subsumes(&l->flags, SLVNDX(l), flagsp) ) { | |
225 if (!best || LEQ(l->flags.u, best->flags.u)) | |
226 best = l; | |
227 } | |
228 } else | |
229 break; | |
230 | |
231 g = addmod(g, d, ht->hashsiz); | |
232 } while (g != h); | |
233 | |
234 if (best) | |
235 ++ht->succ_lookup; | |
236 return best; | |
237 } | |
238 | |
239 static solution *hlookup(planner *ego, const md5sig s, | |
240 const flags_t *flagsp) | |
241 { | |
242 solution *sol = htab_lookup(&ego->htab_blessed, s, flagsp); | |
243 if (!sol) sol = htab_lookup(&ego->htab_unblessed, s, flagsp); | |
244 return sol; | |
245 } | |
246 | |
247 static void fill_slot(hashtab *ht, const md5sig s, const flags_t *flagsp, | |
248 unsigned slvndx, solution *slot) | |
249 { | |
250 ++ht->insert; | |
251 ++ht->nelem; | |
252 A(!LIVEP(slot)); | |
253 slot->flags.u = flagsp->u; | |
254 slot->flags.l = flagsp->l; | |
255 slot->flags.timelimit_impatience = flagsp->timelimit_impatience; | |
256 slot->flags.hash_info |= H_VALID | H_LIVE; | |
257 SLVNDX(slot) = slvndx; | |
258 | |
259 /* keep this check enabled in case we add so many solvers | |
260 that the bitfield overflows */ | |
261 CK(SLVNDX(slot) == slvndx); | |
262 sigcpy(s, slot->s); | |
263 } | |
264 | |
265 static void kill_slot(hashtab *ht, solution *slot) | |
266 { | |
267 A(LIVEP(slot)); /* ==> */ A(VALIDP(slot)); | |
268 | |
269 --ht->nelem; | |
270 slot->flags.hash_info = H_VALID; | |
271 } | |
272 | |
273 static void hinsert0(hashtab *ht, const md5sig s, const flags_t *flagsp, | |
274 unsigned slvndx) | |
275 { | |
276 solution *l; | |
277 unsigned g, h = h1(ht, s), d = h2(ht, s); | |
278 | |
279 ++ht->insert_unknown; | |
280 | |
281 /* search for nonfull slot */ | |
282 for (g = h; ; g = addmod(g, d, ht->hashsiz)) { | |
283 ++ht->insert_iter; | |
284 l = ht->solutions + g; | |
285 if (!LIVEP(l)) break; | |
286 A((g + d) % ht->hashsiz != h); | |
287 } | |
288 | |
289 fill_slot(ht, s, flagsp, slvndx, l); | |
290 } | |
291 | |
292 static void rehash(hashtab *ht, unsigned nsiz) | |
293 { | |
294 unsigned osiz = ht->hashsiz, h; | |
295 solution *osol = ht->solutions, *nsol; | |
296 | |
297 nsiz = (unsigned)X(next_prime)((INT)nsiz); | |
298 nsol = (solution *)MALLOC(nsiz * sizeof(solution), HASHT); | |
299 ++ht->nrehash; | |
300 | |
301 /* init new table */ | |
302 for (h = 0; h < nsiz; ++h) | |
303 nsol[h].flags.hash_info = 0; | |
304 | |
305 /* install new table */ | |
306 ht->hashsiz = nsiz; | |
307 ht->solutions = nsol; | |
308 ht->nelem = 0; | |
309 | |
310 /* copy table */ | |
311 for (h = 0; h < osiz; ++h) { | |
312 solution *l = osol + h; | |
313 if (LIVEP(l)) | |
314 hinsert0(ht, l->s, &l->flags, SLVNDX(l)); | |
315 } | |
316 | |
317 X(ifree0)(osol); | |
318 } | |
319 | |
320 static unsigned minsz(unsigned nelem) | |
321 { | |
322 return 1U + nelem + nelem / 8U; | |
323 } | |
324 | |
325 static unsigned nextsz(unsigned nelem) | |
326 { | |
327 return minsz(minsz(nelem)); | |
328 } | |
329 | |
330 static void hgrow(hashtab *ht) | |
331 { | |
332 unsigned nelem = ht->nelem; | |
333 if (minsz(nelem) >= ht->hashsiz) | |
334 rehash(ht, nextsz(nelem)); | |
335 } | |
336 | |
337 #if 0 | |
338 /* shrink the hash table, never used */ | |
339 static void hshrink(hashtab *ht) | |
340 { | |
341 unsigned nelem = ht->nelem; | |
342 /* always rehash after deletions */ | |
343 rehash(ht, nextsz(nelem)); | |
344 } | |
345 #endif | |
346 | |
347 static void htab_insert(hashtab *ht, const md5sig s, const flags_t *flagsp, | |
348 unsigned slvndx) | |
349 { | |
350 unsigned g, h = h1(ht, s), d = h2(ht, s); | |
351 solution *first = 0; | |
352 | |
353 /* Remove all entries that are subsumed by the new one. */ | |
354 /* This loop may potentially traverse the whole table, since at | |
355 least one element is guaranteed to be !LIVEP, but all elements | |
356 may be VALIDP. Hence, we stop after at the first invalid | |
357 element or after traversing the whole table. */ | |
358 g = h; | |
359 do { | |
360 solution *l = ht->solutions + g; | |
361 ++ht->insert_iter; | |
362 if (VALIDP(l)) { | |
363 if (LIVEP(l) && md5eq(s, l->s)) { | |
364 if (subsumes(flagsp, slvndx, &l->flags)) { | |
365 if (!first) first = l; | |
366 kill_slot(ht, l); | |
367 } else { | |
368 /* It is an error to insert an element that | |
369 is subsumed by an existing entry. */ | |
370 A(!subsumes(&l->flags, SLVNDX(l), flagsp)); | |
371 } | |
372 } | |
373 } else | |
374 break; | |
375 | |
376 g = addmod(g, d, ht->hashsiz); | |
377 } while (g != h); | |
378 | |
379 if (first) { | |
380 /* overwrite FIRST */ | |
381 fill_slot(ht, s, flagsp, slvndx, first); | |
382 } else { | |
383 /* create a new entry */ | |
384 hgrow(ht); | |
385 hinsert0(ht, s, flagsp, slvndx); | |
386 } | |
387 } | |
388 | |
389 static void hinsert(planner *ego, const md5sig s, const flags_t *flagsp, | |
390 unsigned slvndx) | |
391 { | |
392 htab_insert(BLISS(*flagsp) ? &ego->htab_blessed : &ego->htab_unblessed, | |
393 s, flagsp, slvndx ); | |
394 } | |
395 | |
396 | |
397 static void invoke_hook(planner *ego, plan *pln, const problem *p, | |
398 int optimalp) | |
399 { | |
400 if (ego->hook) | |
401 ego->hook(ego, pln, p, optimalp); | |
402 } | |
403 | |
404 #ifdef FFTW_RANDOM_ESTIMATOR | |
405 /* a "random" estimate, used for debugging to generate "random" | |
406 plans, albeit from a deterministic seed. */ | |
407 | |
408 unsigned X(random_estimate_seed) = 0; | |
409 | |
410 static double random_estimate(const planner *ego, const plan *pln, | |
411 const problem *p) | |
412 { | |
413 md5 m; | |
414 X(md5begin)(&m); | |
415 X(md5unsigned)(&m, X(random_estimate_seed)); | |
416 X(md5int)(&m, ego->nthr); | |
417 p->adt->hash(p, &m); | |
418 X(md5putb)(&m, &pln->ops, sizeof(pln->ops)); | |
419 X(md5putb)(&m, &pln->adt, sizeof(pln->adt)); | |
420 X(md5end)(&m); | |
421 return ego->cost_hook ? ego->cost_hook(p, m.s[0], COST_MAX) : m.s[0]; | |
422 } | |
423 | |
424 #endif | |
425 | |
426 double X(iestimate_cost)(const planner *ego, const plan *pln, const problem *p) | |
427 { | |
428 double cost = | |
429 + pln->ops.add | |
430 + pln->ops.mul | |
431 | |
432 #if HAVE_FMA | |
433 + pln->ops.fma | |
434 #else | |
435 + 2 * pln->ops.fma | |
436 #endif | |
437 | |
438 + pln->ops.other; | |
439 if (ego->cost_hook) | |
440 cost = ego->cost_hook(p, cost, COST_MAX); | |
441 return cost; | |
442 } | |
443 | |
444 static void evaluate_plan(planner *ego, plan *pln, const problem *p) | |
445 { | |
446 if (ESTIMATEP(ego) || !BELIEVE_PCOSTP(ego) || pln->pcost == 0.0) { | |
447 ego->nplan++; | |
448 | |
449 if (ESTIMATEP(ego)) { | |
450 estimate: | |
451 /* heuristic */ | |
452 #ifdef FFTW_RANDOM_ESTIMATOR | |
453 pln->pcost = random_estimate(ego, pln, p); | |
454 ego->epcost += X(iestimate_cost)(ego, pln, p); | |
455 #else | |
456 pln->pcost = X(iestimate_cost)(ego, pln, p); | |
457 ego->epcost += pln->pcost; | |
458 #endif | |
459 } else { | |
460 double t = X(measure_execution_time)(ego, pln, p); | |
461 | |
462 if (t < 0) { /* unavailable cycle counter */ | |
463 /* Real programmers can write FORTRAN in any language */ | |
464 goto estimate; | |
465 } | |
466 | |
467 pln->pcost = t; | |
468 ego->pcost += t; | |
469 ego->need_timeout_check = 1; | |
470 } | |
471 } | |
472 | |
473 invoke_hook(ego, pln, p, 0); | |
474 } | |
475 | |
476 /* maintain dynamic scoping of flags, nthr: */ | |
477 static plan *invoke_solver(planner *ego, const problem *p, solver *s, | |
478 const flags_t *nflags) | |
479 { | |
480 flags_t flags = ego->flags; | |
481 int nthr = ego->nthr; | |
482 plan *pln; | |
483 ego->flags = *nflags; | |
484 PLNR_TIMELIMIT_IMPATIENCE(ego) = 0; | |
485 A(p->adt->problem_kind == s->adt->problem_kind); | |
486 pln = s->adt->mkplan(s, p, ego); | |
487 ego->nthr = nthr; | |
488 ego->flags = flags; | |
489 return pln; | |
490 } | |
491 | |
492 /* maintain the invariant TIMED_OUT ==> NEED_TIMEOUT_CHECK */ | |
493 static int timeout_p(planner *ego, const problem *p) | |
494 { | |
495 /* do not timeout when estimating. First, the estimator is the | |
496 planner of last resort. Second, calling X(elapsed_since)() is | |
497 slower than estimating */ | |
498 if (!ESTIMATEP(ego)) { | |
499 /* do not assume that X(elapsed_since)() is monotonic */ | |
500 if (ego->timed_out) { | |
501 A(ego->need_timeout_check); | |
502 return 1; | |
503 } | |
504 | |
505 if (ego->timelimit >= 0 && | |
506 X(elapsed_since)(ego, p, ego->start_time) >= ego->timelimit) { | |
507 ego->timed_out = 1; | |
508 ego->need_timeout_check = 1; | |
509 return 1; | |
510 } | |
511 } | |
512 | |
513 A(!ego->timed_out); | |
514 ego->need_timeout_check = 0; | |
515 return 0; | |
516 } | |
517 | |
518 static plan *search0(planner *ego, const problem *p, unsigned *slvndx, | |
519 const flags_t *flagsp) | |
520 { | |
521 plan *best = 0; | |
522 int best_not_yet_timed = 1; | |
523 | |
524 /* Do not start a search if the planner timed out. This check is | |
525 necessary, lest the relaxation mechanism kick in */ | |
526 if (timeout_p(ego, p)) | |
527 return 0; | |
528 | |
529 FORALL_SOLVERS_OF_KIND(p->adt->problem_kind, ego, s, sp, { | |
530 plan *pln; | |
531 | |
532 pln = invoke_solver(ego, p, s, flagsp); | |
533 | |
534 if (ego->need_timeout_check) | |
535 if (timeout_p(ego, p)) { | |
536 X(plan_destroy_internal)(pln); | |
537 X(plan_destroy_internal)(best); | |
538 return 0; | |
539 } | |
540 | |
541 if (pln) { | |
542 /* read COULD_PRUNE_NOW_P because PLN may be destroyed | |
543 before we use COULD_PRUNE_NOW_P */ | |
544 int could_prune_now_p = pln->could_prune_now_p; | |
545 | |
546 if (best) { | |
547 if (best_not_yet_timed) { | |
548 evaluate_plan(ego, best, p); | |
549 best_not_yet_timed = 0; | |
550 } | |
551 evaluate_plan(ego, pln, p); | |
552 if (pln->pcost < best->pcost) { | |
553 X(plan_destroy_internal)(best); | |
554 best = pln; | |
555 *slvndx = (unsigned)/*from ptrdiff_t*/(sp - ego->slvdescs); | |
556 } else { | |
557 X(plan_destroy_internal)(pln); | |
558 } | |
559 } else { | |
560 best = pln; | |
561 *slvndx = (unsigned)/*from ptrdiff_t*/(sp - ego->slvdescs); | |
562 } | |
563 | |
564 if (ALLOW_PRUNINGP(ego) && could_prune_now_p) | |
565 break; | |
566 } | |
567 }); | |
568 | |
569 return best; | |
570 } | |
571 | |
572 static plan *search(planner *ego, const problem *p, unsigned *slvndx, | |
573 flags_t *flagsp) | |
574 { | |
575 plan *pln = 0; | |
576 unsigned i; | |
577 | |
578 /* relax impatience in this order: */ | |
579 static const unsigned relax_tab[] = { | |
580 0, /* relax nothing */ | |
581 NO_VRECURSE, | |
582 NO_FIXED_RADIX_LARGE_N, | |
583 NO_SLOW, | |
584 NO_UGLY | |
585 }; | |
586 | |
587 unsigned l_orig = flagsp->l; | |
588 unsigned x = flagsp->u; | |
589 | |
590 /* guaranteed to be different from X */ | |
591 unsigned last_x = ~x; | |
592 | |
593 for (i = 0; i < sizeof(relax_tab) / sizeof(relax_tab[0]); ++i) { | |
594 if (LEQ(l_orig, x & ~relax_tab[i])) | |
595 x = x & ~relax_tab[i]; | |
596 | |
597 if (x != last_x) { | |
598 last_x = x; | |
599 flagsp->l = x; | |
600 pln = search0(ego, p, slvndx, flagsp); | |
601 if (pln) break; | |
602 } | |
603 } | |
604 | |
605 if (!pln) { | |
606 /* search [L_ORIG, U] */ | |
607 if (l_orig != last_x) { | |
608 last_x = l_orig; | |
609 flagsp->l = l_orig; | |
610 pln = search0(ego, p, slvndx, flagsp); | |
611 } | |
612 } | |
613 | |
614 return pln; | |
615 } | |
616 | |
617 #define CHECK_FOR_BOGOSITY \ | |
618 if ((ego->bogosity_hook ? \ | |
619 (ego->wisdom_state = ego->bogosity_hook(ego->wisdom_state, p)) \ | |
620 : ego->wisdom_state) == WISDOM_IS_BOGUS) \ | |
621 goto wisdom_is_bogus; | |
622 | |
623 static plan *mkplan(planner *ego, const problem *p) | |
624 { | |
625 plan *pln; | |
626 md5 m; | |
627 unsigned slvndx; | |
628 flags_t flags_of_solution; | |
629 solution *sol; | |
630 solver *s; | |
631 | |
632 ASSERT_ALIGNED_DOUBLE; | |
633 A(LEQ(PLNR_L(ego), PLNR_U(ego))); | |
634 | |
635 if (ESTIMATEP(ego)) | |
636 PLNR_TIMELIMIT_IMPATIENCE(ego) = 0; /* canonical form */ | |
637 | |
638 | |
639 #ifdef FFTW_DEBUG | |
640 check(&ego->htab_blessed); | |
641 check(&ego->htab_unblessed); | |
642 #endif | |
643 | |
644 pln = 0; | |
645 | |
646 CHECK_FOR_BOGOSITY; | |
647 | |
648 ego->timed_out = 0; | |
649 | |
650 ++ego->nprob; | |
651 md5hash(&m, p, ego); | |
652 | |
653 flags_of_solution = ego->flags; | |
654 | |
655 if (ego->wisdom_state != WISDOM_IGNORE_ALL) { | |
656 if ((sol = hlookup(ego, m.s, &flags_of_solution))) { | |
657 /* wisdom is acceptable */ | |
658 wisdom_state_t owisdom_state = ego->wisdom_state; | |
659 | |
660 /* this hook is mainly for MPI, to make sure that | |
661 wisdom is in sync across all processes for MPI problems */ | |
662 if (ego->wisdom_ok_hook && !ego->wisdom_ok_hook(p, sol->flags)) | |
663 goto do_search; /* ignore not-ok wisdom */ | |
664 | |
665 slvndx = SLVNDX(sol); | |
666 | |
667 if (slvndx == INFEASIBLE_SLVNDX) { | |
668 if (ego->wisdom_state == WISDOM_IGNORE_INFEASIBLE) | |
669 goto do_search; | |
670 else | |
671 return 0; /* known to be infeasible */ | |
672 } | |
673 | |
674 flags_of_solution = sol->flags; | |
675 | |
676 /* inherit blessing either from wisdom | |
677 or from the planner */ | |
678 flags_of_solution.hash_info |= BLISS(ego->flags); | |
679 | |
680 ego->wisdom_state = WISDOM_ONLY; | |
681 | |
682 s = ego->slvdescs[slvndx].slv; | |
683 if (p->adt->problem_kind != s->adt->problem_kind) | |
684 goto wisdom_is_bogus; | |
685 | |
686 pln = invoke_solver(ego, p, s, &flags_of_solution); | |
687 | |
688 CHECK_FOR_BOGOSITY; /* catch error in child solvers */ | |
689 | |
690 sol = 0; /* Paranoia: SOL may be dangling after | |
691 invoke_solver(); make sure we don't accidentally | |
692 reuse it. */ | |
693 | |
694 if (!pln) | |
695 goto wisdom_is_bogus; | |
696 | |
697 ego->wisdom_state = owisdom_state; | |
698 | |
699 goto skip_search; | |
700 } | |
701 else if (ego->nowisdom_hook) /* for MPI, make sure lack of wisdom */ | |
702 ego->nowisdom_hook(p); /* is in sync across all processes */ | |
703 } | |
704 | |
705 do_search: | |
706 /* cannot search in WISDOM_ONLY mode */ | |
707 if (ego->wisdom_state == WISDOM_ONLY) | |
708 goto wisdom_is_bogus; | |
709 | |
710 flags_of_solution = ego->flags; | |
711 pln = search(ego, p, &slvndx, &flags_of_solution); | |
712 CHECK_FOR_BOGOSITY; /* catch error in child solvers */ | |
713 | |
714 if (ego->timed_out) { | |
715 A(!pln); | |
716 if (PLNR_TIMELIMIT_IMPATIENCE(ego) != 0) { | |
717 /* record (below) that this plan has failed because of | |
718 timeout */ | |
719 flags_of_solution.hash_info |= BLESSING; | |
720 } else { | |
721 /* this is not the top-level problem or timeout is not | |
722 active: record no wisdom. */ | |
723 return 0; | |
724 } | |
725 } else { | |
726 /* canonicalize to infinite timeout */ | |
727 flags_of_solution.timelimit_impatience = 0; | |
728 } | |
729 | |
730 skip_search: | |
731 if (ego->wisdom_state == WISDOM_NORMAL || | |
732 ego->wisdom_state == WISDOM_ONLY) { | |
733 if (pln) { | |
734 hinsert(ego, m.s, &flags_of_solution, slvndx); | |
735 invoke_hook(ego, pln, p, 1); | |
736 } else { | |
737 hinsert(ego, m.s, &flags_of_solution, INFEASIBLE_SLVNDX); | |
738 } | |
739 } | |
740 | |
741 return pln; | |
742 | |
743 wisdom_is_bogus: | |
744 X(plan_destroy_internal)(pln); | |
745 ego->wisdom_state = WISDOM_IS_BOGUS; | |
746 return 0; | |
747 } | |
748 | |
749 static void htab_destroy(hashtab *ht) | |
750 { | |
751 X(ifree)(ht->solutions); | |
752 ht->solutions = 0; | |
753 ht->nelem = 0U; | |
754 } | |
755 | |
756 static void mkhashtab(hashtab *ht) | |
757 { | |
758 ht->nrehash = 0; | |
759 ht->succ_lookup = ht->lookup = ht->lookup_iter = 0; | |
760 ht->insert = ht->insert_iter = ht->insert_unknown = 0; | |
761 | |
762 ht->solutions = 0; | |
763 ht->hashsiz = ht->nelem = 0U; | |
764 hgrow(ht); /* so that hashsiz > 0 */ | |
765 } | |
766 | |
767 /* destroy hash table entries. If FORGET_EVERYTHING, destroy the whole | |
768 table. If FORGET_ACCURSED, then destroy entries that are not blessed. */ | |
769 static void forget(planner *ego, amnesia a) | |
770 { | |
771 switch (a) { | |
772 case FORGET_EVERYTHING: | |
773 htab_destroy(&ego->htab_blessed); | |
774 mkhashtab(&ego->htab_blessed); | |
775 /* fall through */ | |
776 case FORGET_ACCURSED: | |
777 htab_destroy(&ego->htab_unblessed); | |
778 mkhashtab(&ego->htab_unblessed); | |
779 break; | |
780 default: | |
781 break; | |
782 } | |
783 } | |
784 | |
785 /* FIXME: what sort of version information should we write? */ | |
786 #define WISDOM_PREAMBLE PACKAGE "-" VERSION " " STRINGIZE(X(wisdom)) | |
787 static const char stimeout[] = "TIMEOUT"; | |
788 | |
789 /* tantus labor non sit cassus */ | |
790 static void exprt(planner *ego, printer *p) | |
791 { | |
792 unsigned h; | |
793 hashtab *ht = &ego->htab_blessed; | |
794 md5 m; | |
795 | |
796 signature_of_configuration(&m, ego); | |
797 | |
798 p->print(p, | |
799 "(" WISDOM_PREAMBLE " #x%M #x%M #x%M #x%M\n", | |
800 m.s[0], m.s[1], m.s[2], m.s[3]); | |
801 | |
802 for (h = 0; h < ht->hashsiz; ++h) { | |
803 solution *l = ht->solutions + h; | |
804 if (LIVEP(l)) { | |
805 const char *reg_nam; | |
806 int reg_id; | |
807 | |
808 if (SLVNDX(l) == INFEASIBLE_SLVNDX) { | |
809 reg_nam = stimeout; | |
810 reg_id = 0; | |
811 } else { | |
812 slvdesc *sp = ego->slvdescs + SLVNDX(l); | |
813 reg_nam = sp->reg_nam; | |
814 reg_id = sp->reg_id; | |
815 } | |
816 | |
817 /* qui salvandos salvas gratis | |
818 salva me fons pietatis */ | |
819 p->print(p, " (%s %d #x%x #x%x #x%x #x%M #x%M #x%M #x%M)\n", | |
820 reg_nam, reg_id, | |
821 l->flags.l, l->flags.u, l->flags.timelimit_impatience, | |
822 l->s[0], l->s[1], l->s[2], l->s[3]); | |
823 } | |
824 } | |
825 p->print(p, ")\n"); | |
826 } | |
827 | |
828 /* mors stupebit et natura | |
829 cum resurget creatura */ | |
830 static int imprt(planner *ego, scanner *sc) | |
831 { | |
832 char buf[MAXNAM + 1]; | |
833 md5uint sig[4]; | |
834 unsigned l, u, timelimit_impatience; | |
835 flags_t flags; | |
836 int reg_id; | |
837 unsigned slvndx; | |
838 hashtab *ht = &ego->htab_blessed; | |
839 hashtab old; | |
840 md5 m; | |
841 | |
842 if (!sc->scan(sc, | |
843 "(" WISDOM_PREAMBLE " #x%M #x%M #x%M #x%M\n", | |
844 sig + 0, sig + 1, sig + 2, sig + 3)) | |
845 return 0; /* don't need to restore hashtable */ | |
846 | |
847 signature_of_configuration(&m, ego); | |
848 if (m.s[0] != sig[0] || m.s[1] != sig[1] || | |
849 m.s[2] != sig[2] || m.s[3] != sig[3]) { | |
850 /* invalid configuration */ | |
851 return 0; | |
852 } | |
853 | |
854 /* make a backup copy of the hash table (cache the hash) */ | |
855 { | |
856 unsigned h, hsiz = ht->hashsiz; | |
857 old = *ht; | |
858 old.solutions = (solution *)MALLOC(hsiz * sizeof(solution), HASHT); | |
859 for (h = 0; h < hsiz; ++h) | |
860 old.solutions[h] = ht->solutions[h]; | |
861 } | |
862 | |
863 while (1) { | |
864 if (sc->scan(sc, ")")) | |
865 break; | |
866 | |
867 /* qua resurget ex favilla */ | |
868 if (!sc->scan(sc, "(%*s %d #x%x #x%x #x%x #x%M #x%M #x%M #x%M)", | |
869 MAXNAM, buf, ®_id, &l, &u, &timelimit_impatience, | |
870 sig + 0, sig + 1, sig + 2, sig + 3)) | |
871 goto bad; | |
872 | |
873 if (!strcmp(buf, stimeout) && reg_id == 0) { | |
874 slvndx = INFEASIBLE_SLVNDX; | |
875 } else { | |
876 if (timelimit_impatience != 0) | |
877 goto bad; | |
878 | |
879 slvndx = slookup(ego, buf, reg_id); | |
880 if (slvndx == INFEASIBLE_SLVNDX) | |
881 goto bad; | |
882 } | |
883 | |
884 /* inter oves locum praesta */ | |
885 flags.l = l; | |
886 flags.u = u; | |
887 flags.timelimit_impatience = timelimit_impatience; | |
888 flags.hash_info = BLESSING; | |
889 | |
890 CK(flags.l == l); | |
891 CK(flags.u == u); | |
892 CK(flags.timelimit_impatience == timelimit_impatience); | |
893 | |
894 if (!hlookup(ego, sig, &flags)) | |
895 hinsert(ego, sig, &flags, slvndx); | |
896 } | |
897 | |
898 X(ifree0)(old.solutions); | |
899 return 1; | |
900 | |
901 bad: | |
902 /* ``The wisdom of FFTW must be above suspicion.'' */ | |
903 X(ifree0)(ht->solutions); | |
904 *ht = old; | |
905 return 0; | |
906 } | |
907 | |
908 /* | |
909 * create a planner | |
910 */ | |
911 planner *X(mkplanner)(void) | |
912 { | |
913 int i; | |
914 | |
915 static const planner_adt padt = { | |
916 register_solver, mkplan, forget, exprt, imprt | |
917 }; | |
918 | |
919 planner *p = (planner *) MALLOC(sizeof(planner), PLANNERS); | |
920 | |
921 p->adt = &padt; | |
922 p->nplan = p->nprob = 0; | |
923 p->pcost = p->epcost = 0.0; | |
924 p->hook = 0; | |
925 p->cost_hook = 0; | |
926 p->wisdom_ok_hook = 0; | |
927 p->nowisdom_hook = 0; | |
928 p->bogosity_hook = 0; | |
929 p->cur_reg_nam = 0; | |
930 p->wisdom_state = WISDOM_NORMAL; | |
931 | |
932 p->slvdescs = 0; | |
933 p->nslvdesc = p->slvdescsiz = 0; | |
934 | |
935 p->flags.l = 0; | |
936 p->flags.u = 0; | |
937 p->flags.timelimit_impatience = 0; | |
938 p->flags.hash_info = 0; | |
939 p->nthr = 1; | |
940 p->need_timeout_check = 1; | |
941 p->timelimit = -1; | |
942 | |
943 mkhashtab(&p->htab_blessed); | |
944 mkhashtab(&p->htab_unblessed); | |
945 | |
946 for (i = 0; i < PROBLEM_LAST; ++i) | |
947 p->slvdescs_for_problem_kind[i] = -1; | |
948 | |
949 return p; | |
950 } | |
951 | |
952 void X(planner_destroy)(planner *ego) | |
953 { | |
954 /* destroy hash table */ | |
955 htab_destroy(&ego->htab_blessed); | |
956 htab_destroy(&ego->htab_unblessed); | |
957 | |
958 /* destroy solvdesc table */ | |
959 FORALL_SOLVERS(ego, s, sp, { | |
960 UNUSED(sp); | |
961 X(solver_destroy)(s); | |
962 }); | |
963 | |
964 X(ifree0)(ego->slvdescs); | |
965 X(ifree)(ego); /* dona eis requiem */ | |
966 } | |
967 | |
968 plan *X(mkplan_d)(planner *ego, problem *p) | |
969 { | |
970 plan *pln = ego->adt->mkplan(ego, p); | |
971 X(problem_destroy)(p); | |
972 return pln; | |
973 } | |
974 | |
975 /* like X(mkplan_d), but sets/resets flags as well */ | |
976 plan *X(mkplan_f_d)(planner *ego, problem *p, | |
977 unsigned l_set, unsigned u_set, unsigned u_reset) | |
978 { | |
979 flags_t oflags = ego->flags; | |
980 plan *pln; | |
981 | |
982 PLNR_U(ego) &= ~u_reset; | |
983 PLNR_L(ego) &= ~u_reset; | |
984 PLNR_L(ego) |= l_set; | |
985 PLNR_U(ego) |= u_set | l_set; | |
986 pln = X(mkplan_d)(ego, p); | |
987 ego->flags = oflags; | |
988 return pln; | |
989 } | |
990 | |
991 /* | |
992 * Debugging code: | |
993 */ | |
994 #ifdef FFTW_DEBUG | |
995 static void check(hashtab *ht) | |
996 { | |
997 unsigned live = 0; | |
998 unsigned i; | |
999 | |
1000 A(ht->nelem < ht->hashsiz); | |
1001 | |
1002 for (i = 0; i < ht->hashsiz; ++i) { | |
1003 solution *l = ht->solutions + i; | |
1004 if (LIVEP(l)) | |
1005 ++live; | |
1006 } | |
1007 | |
1008 A(ht->nelem == live); | |
1009 | |
1010 for (i = 0; i < ht->hashsiz; ++i) { | |
1011 solution *l1 = ht->solutions + i; | |
1012 int foundit = 0; | |
1013 if (LIVEP(l1)) { | |
1014 unsigned g, h = h1(ht, l1->s), d = h2(ht, l1->s); | |
1015 | |
1016 g = h; | |
1017 do { | |
1018 solution *l = ht->solutions + g; | |
1019 if (VALIDP(l)) { | |
1020 if (l1 == l) | |
1021 foundit = 1; | |
1022 else if (LIVEP(l) && md5eq(l1->s, l->s)) { | |
1023 A(!subsumes(&l->flags, SLVNDX(l), &l1->flags)); | |
1024 A(!subsumes(&l1->flags, SLVNDX(l1), &l->flags)); | |
1025 } | |
1026 } else | |
1027 break; | |
1028 g = addmod(g, d, ht->hashsiz); | |
1029 } while (g != h); | |
1030 | |
1031 A(foundit); | |
1032 } | |
1033 } | |
1034 } | |
1035 #endif |