cannam@167: /* cannam@167: * Copyright (c) 2000 Matteo Frigo cannam@167: * Copyright (c) 2000 Massachusetts Institute of Technology cannam@167: * cannam@167: * This program is free software; you can redistribute it and/or modify cannam@167: * it under the terms of the GNU General Public License as published by cannam@167: * the Free Software Foundation; either version 2 of the License, or cannam@167: * (at your option) any later version. cannam@167: * cannam@167: * This program is distributed in the hope that it will be useful, cannam@167: * but WITHOUT ANY WARRANTY; without even the implied warranty of cannam@167: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the cannam@167: * GNU General Public License for more details. cannam@167: * cannam@167: * You should have received a copy of the GNU General Public License cannam@167: * along with this program; if not, write to the Free Software cannam@167: * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA cannam@167: * cannam@167: */ cannam@167: cannam@167: #include "kernel/ifftw.h" cannam@167: #include cannam@167: cannam@167: /* GNU Coding Standards, Sec. 5.2: "Please write the comments in a GNU cannam@167: program in English, because English is the one language that nearly cannam@167: all programmers in all countries can read." cannam@167: cannam@167: ingemisco tanquam reus cannam@167: culpa rubet vultus meus cannam@167: supplicanti parce [rms] cannam@167: */ cannam@167: cannam@167: #define VALIDP(solution) ((solution)->flags.hash_info & H_VALID) cannam@167: #define LIVEP(solution) ((solution)->flags.hash_info & H_LIVE) cannam@167: #define SLVNDX(solution) ((solution)->flags.slvndx) cannam@167: #define BLISS(flags) (((flags).hash_info) & BLESSING) cannam@167: #define INFEASIBLE_SLVNDX ((1U<timelimit_impatience == 0); cannam@167: return (LEQ(a->u, b->u) && LEQ(b->l, a->l)); cannam@167: } else { cannam@167: return (LEQ(a->l, b->l) cannam@167: && a->timelimit_impatience <= b->timelimit_impatience); cannam@167: } cannam@167: } cannam@167: cannam@167: static unsigned addmod(unsigned a, unsigned b, unsigned p) cannam@167: { cannam@167: /* gcc-2.95/sparc produces incorrect code for the fast version below. */ cannam@167: #if defined(__sparc__) && defined(__GNUC__) cannam@167: /* slow version */ cannam@167: return (a + b) % p; cannam@167: #else cannam@167: /* faster version */ cannam@167: unsigned c = a + b; cannam@167: return c >= p ? c - p : c; cannam@167: #endif cannam@167: } cannam@167: cannam@167: /* cannam@167: slvdesc management: cannam@167: */ cannam@167: static void sgrow(planner *ego) cannam@167: { cannam@167: unsigned osiz = ego->slvdescsiz, nsiz = 1 + osiz + osiz / 4; cannam@167: slvdesc *ntab = (slvdesc *)MALLOC(nsiz * sizeof(slvdesc), SLVDESCS); cannam@167: slvdesc *otab = ego->slvdescs; cannam@167: unsigned i; cannam@167: cannam@167: ego->slvdescs = ntab; cannam@167: ego->slvdescsiz = nsiz; cannam@167: for (i = 0; i < osiz; ++i) cannam@167: ntab[i] = otab[i]; cannam@167: X(ifree0)(otab); cannam@167: } cannam@167: cannam@167: static void register_solver(planner *ego, solver *s) cannam@167: { cannam@167: slvdesc *n; cannam@167: int kind; cannam@167: cannam@167: if (s) { /* add s to solver list */ cannam@167: X(solver_use)(s); cannam@167: cannam@167: A(ego->nslvdesc < INFEASIBLE_SLVNDX); cannam@167: if (ego->nslvdesc >= ego->slvdescsiz) cannam@167: sgrow(ego); cannam@167: cannam@167: n = ego->slvdescs + ego->nslvdesc; cannam@167: cannam@167: n->slv = s; cannam@167: n->reg_nam = ego->cur_reg_nam; cannam@167: n->reg_id = ego->cur_reg_id++; cannam@167: cannam@167: A(strlen(n->reg_nam) < MAXNAM); cannam@167: n->nam_hash = X(hash)(n->reg_nam); cannam@167: cannam@167: kind = s->adt->problem_kind; cannam@167: n->next_for_same_problem_kind = ego->slvdescs_for_problem_kind[kind]; cannam@167: ego->slvdescs_for_problem_kind[kind] = (int)/*from unsigned*/ego->nslvdesc; cannam@167: cannam@167: ego->nslvdesc++; cannam@167: } cannam@167: } cannam@167: cannam@167: static unsigned slookup(planner *ego, char *nam, int id) cannam@167: { cannam@167: unsigned h = X(hash)(nam); /* used to avoid strcmp in the common case */ cannam@167: FORALL_SOLVERS(ego, s, sp, { cannam@167: UNUSED(s); cannam@167: if (sp->reg_id == id && sp->nam_hash == h cannam@167: && !strcmp(sp->reg_nam, nam)) cannam@167: return (unsigned)/*from ptrdiff_t*/(sp - ego->slvdescs); cannam@167: }); cannam@167: return INFEASIBLE_SLVNDX; cannam@167: } cannam@167: cannam@167: /* Compute a MD5 hash of the configuration of the planner. cannam@167: We store it into the wisdom file to make absolutely sure that cannam@167: we are reading wisdom that is applicable */ cannam@167: static void signature_of_configuration(md5 *m, planner *ego) cannam@167: { cannam@167: X(md5begin)(m); cannam@167: X(md5unsigned)(m, sizeof(R)); /* so we don't mix different precisions */ cannam@167: FORALL_SOLVERS(ego, s, sp, { cannam@167: UNUSED(s); cannam@167: X(md5int)(m, sp->reg_id); cannam@167: X(md5puts)(m, sp->reg_nam); cannam@167: }); cannam@167: X(md5end)(m); cannam@167: } cannam@167: cannam@167: /* cannam@167: md5-related stuff: cannam@167: */ cannam@167: cannam@167: /* first hash function */ cannam@167: static unsigned h1(const hashtab *ht, const md5sig s) cannam@167: { cannam@167: unsigned h = s[0] % ht->hashsiz; cannam@167: A(h == (s[0] % ht->hashsiz)); cannam@167: return h; cannam@167: } cannam@167: cannam@167: /* second hash function (for double hashing) */ cannam@167: static unsigned h2(const hashtab *ht, const md5sig s) cannam@167: { cannam@167: unsigned h = 1U + s[1] % (ht->hashsiz - 1); cannam@167: A(h == (1U + s[1] % (ht->hashsiz - 1))); cannam@167: return h; cannam@167: } cannam@167: cannam@167: static void md5hash(md5 *m, const problem *p, const planner *plnr) cannam@167: { cannam@167: X(md5begin)(m); cannam@167: X(md5unsigned)(m, sizeof(R)); /* so we don't mix different precisions */ cannam@167: X(md5int)(m, plnr->nthr); cannam@167: p->adt->hash(p, m); cannam@167: X(md5end)(m); cannam@167: } cannam@167: cannam@167: static int md5eq(const md5sig a, const md5sig b) cannam@167: { cannam@167: return a[0] == b[0] && a[1] == b[1] && a[2] == b[2] && a[3] == b[3]; cannam@167: } cannam@167: cannam@167: static void sigcpy(const md5sig a, md5sig b) cannam@167: { cannam@167: b[0] = a[0]; b[1] = a[1]; b[2] = a[2]; b[3] = a[3]; cannam@167: } cannam@167: cannam@167: /* cannam@167: memoization routines : cannam@167: */ cannam@167: cannam@167: /* cannam@167: liber scriptus proferetur cannam@167: in quo totum continetur cannam@167: unde mundus iudicetur cannam@167: */ cannam@167: struct solution_s { cannam@167: md5sig s; cannam@167: flags_t flags; cannam@167: }; cannam@167: cannam@167: static solution *htab_lookup(hashtab *ht, const md5sig s, cannam@167: const flags_t *flagsp) cannam@167: { cannam@167: unsigned g, h = h1(ht, s), d = h2(ht, s); cannam@167: solution *best = 0; cannam@167: cannam@167: ++ht->lookup; cannam@167: cannam@167: /* search all entries that match; select the one with cannam@167: the lowest flags.u */ cannam@167: /* This loop may potentially traverse the whole table, since at cannam@167: least one element is guaranteed to be !LIVEP, but all elements cannam@167: may be VALIDP. Hence, we stop after at the first invalid cannam@167: element or after traversing the whole table. */ cannam@167: g = h; cannam@167: do { cannam@167: solution *l = ht->solutions + g; cannam@167: ++ht->lookup_iter; cannam@167: if (VALIDP(l)) { cannam@167: if (LIVEP(l) cannam@167: && md5eq(s, l->s) cannam@167: && subsumes(&l->flags, SLVNDX(l), flagsp) ) { cannam@167: if (!best || LEQ(l->flags.u, best->flags.u)) cannam@167: best = l; cannam@167: } cannam@167: } else cannam@167: break; cannam@167: cannam@167: g = addmod(g, d, ht->hashsiz); cannam@167: } while (g != h); cannam@167: cannam@167: if (best) cannam@167: ++ht->succ_lookup; cannam@167: return best; cannam@167: } cannam@167: cannam@167: static solution *hlookup(planner *ego, const md5sig s, cannam@167: const flags_t *flagsp) cannam@167: { cannam@167: solution *sol = htab_lookup(&ego->htab_blessed, s, flagsp); cannam@167: if (!sol) sol = htab_lookup(&ego->htab_unblessed, s, flagsp); cannam@167: return sol; cannam@167: } cannam@167: cannam@167: static void fill_slot(hashtab *ht, const md5sig s, const flags_t *flagsp, cannam@167: unsigned slvndx, solution *slot) cannam@167: { cannam@167: ++ht->insert; cannam@167: ++ht->nelem; cannam@167: A(!LIVEP(slot)); cannam@167: slot->flags.u = flagsp->u; cannam@167: slot->flags.l = flagsp->l; cannam@167: slot->flags.timelimit_impatience = flagsp->timelimit_impatience; cannam@167: slot->flags.hash_info |= H_VALID | H_LIVE; cannam@167: SLVNDX(slot) = slvndx; cannam@167: cannam@167: /* keep this check enabled in case we add so many solvers cannam@167: that the bitfield overflows */ cannam@167: CK(SLVNDX(slot) == slvndx); cannam@167: sigcpy(s, slot->s); cannam@167: } cannam@167: cannam@167: static void kill_slot(hashtab *ht, solution *slot) cannam@167: { cannam@167: A(LIVEP(slot)); /* ==> */ A(VALIDP(slot)); cannam@167: cannam@167: --ht->nelem; cannam@167: slot->flags.hash_info = H_VALID; cannam@167: } cannam@167: cannam@167: static void hinsert0(hashtab *ht, const md5sig s, const flags_t *flagsp, cannam@167: unsigned slvndx) cannam@167: { cannam@167: solution *l; cannam@167: unsigned g, h = h1(ht, s), d = h2(ht, s); cannam@167: cannam@167: ++ht->insert_unknown; cannam@167: cannam@167: /* search for nonfull slot */ cannam@167: for (g = h; ; g = addmod(g, d, ht->hashsiz)) { cannam@167: ++ht->insert_iter; cannam@167: l = ht->solutions + g; cannam@167: if (!LIVEP(l)) break; cannam@167: A((g + d) % ht->hashsiz != h); cannam@167: } cannam@167: cannam@167: fill_slot(ht, s, flagsp, slvndx, l); cannam@167: } cannam@167: cannam@167: static void rehash(hashtab *ht, unsigned nsiz) cannam@167: { cannam@167: unsigned osiz = ht->hashsiz, h; cannam@167: solution *osol = ht->solutions, *nsol; cannam@167: cannam@167: nsiz = (unsigned)X(next_prime)((INT)nsiz); cannam@167: nsol = (solution *)MALLOC(nsiz * sizeof(solution), HASHT); cannam@167: ++ht->nrehash; cannam@167: cannam@167: /* init new table */ cannam@167: for (h = 0; h < nsiz; ++h) cannam@167: nsol[h].flags.hash_info = 0; cannam@167: cannam@167: /* install new table */ cannam@167: ht->hashsiz = nsiz; cannam@167: ht->solutions = nsol; cannam@167: ht->nelem = 0; cannam@167: cannam@167: /* copy table */ cannam@167: for (h = 0; h < osiz; ++h) { cannam@167: solution *l = osol + h; cannam@167: if (LIVEP(l)) cannam@167: hinsert0(ht, l->s, &l->flags, SLVNDX(l)); cannam@167: } cannam@167: cannam@167: X(ifree0)(osol); cannam@167: } cannam@167: cannam@167: static unsigned minsz(unsigned nelem) cannam@167: { cannam@167: return 1U + nelem + nelem / 8U; cannam@167: } cannam@167: cannam@167: static unsigned nextsz(unsigned nelem) cannam@167: { cannam@167: return minsz(minsz(nelem)); cannam@167: } cannam@167: cannam@167: static void hgrow(hashtab *ht) cannam@167: { cannam@167: unsigned nelem = ht->nelem; cannam@167: if (minsz(nelem) >= ht->hashsiz) cannam@167: rehash(ht, nextsz(nelem)); cannam@167: } cannam@167: cannam@167: #if 0 cannam@167: /* shrink the hash table, never used */ cannam@167: static void hshrink(hashtab *ht) cannam@167: { cannam@167: unsigned nelem = ht->nelem; cannam@167: /* always rehash after deletions */ cannam@167: rehash(ht, nextsz(nelem)); cannam@167: } cannam@167: #endif cannam@167: cannam@167: static void htab_insert(hashtab *ht, const md5sig s, const flags_t *flagsp, cannam@167: unsigned slvndx) cannam@167: { cannam@167: unsigned g, h = h1(ht, s), d = h2(ht, s); cannam@167: solution *first = 0; cannam@167: cannam@167: /* Remove all entries that are subsumed by the new one. */ cannam@167: /* This loop may potentially traverse the whole table, since at cannam@167: least one element is guaranteed to be !LIVEP, but all elements cannam@167: may be VALIDP. Hence, we stop after at the first invalid cannam@167: element or after traversing the whole table. */ cannam@167: g = h; cannam@167: do { cannam@167: solution *l = ht->solutions + g; cannam@167: ++ht->insert_iter; cannam@167: if (VALIDP(l)) { cannam@167: if (LIVEP(l) && md5eq(s, l->s)) { cannam@167: if (subsumes(flagsp, slvndx, &l->flags)) { cannam@167: if (!first) first = l; cannam@167: kill_slot(ht, l); cannam@167: } else { cannam@167: /* It is an error to insert an element that cannam@167: is subsumed by an existing entry. */ cannam@167: A(!subsumes(&l->flags, SLVNDX(l), flagsp)); cannam@167: } cannam@167: } cannam@167: } else cannam@167: break; cannam@167: cannam@167: g = addmod(g, d, ht->hashsiz); cannam@167: } while (g != h); cannam@167: cannam@167: if (first) { cannam@167: /* overwrite FIRST */ cannam@167: fill_slot(ht, s, flagsp, slvndx, first); cannam@167: } else { cannam@167: /* create a new entry */ cannam@167: hgrow(ht); cannam@167: hinsert0(ht, s, flagsp, slvndx); cannam@167: } cannam@167: } cannam@167: cannam@167: static void hinsert(planner *ego, const md5sig s, const flags_t *flagsp, cannam@167: unsigned slvndx) cannam@167: { cannam@167: htab_insert(BLISS(*flagsp) ? &ego->htab_blessed : &ego->htab_unblessed, cannam@167: s, flagsp, slvndx ); cannam@167: } cannam@167: cannam@167: cannam@167: static void invoke_hook(planner *ego, plan *pln, const problem *p, cannam@167: int optimalp) cannam@167: { cannam@167: if (ego->hook) cannam@167: ego->hook(ego, pln, p, optimalp); cannam@167: } cannam@167: cannam@167: #ifdef FFTW_RANDOM_ESTIMATOR cannam@167: /* a "random" estimate, used for debugging to generate "random" cannam@167: plans, albeit from a deterministic seed. */ cannam@167: cannam@167: unsigned X(random_estimate_seed) = 0; cannam@167: cannam@167: static double random_estimate(const planner *ego, const plan *pln, cannam@167: const problem *p) cannam@167: { cannam@167: md5 m; cannam@167: X(md5begin)(&m); cannam@167: X(md5unsigned)(&m, X(random_estimate_seed)); cannam@167: X(md5int)(&m, ego->nthr); cannam@167: p->adt->hash(p, &m); cannam@167: X(md5putb)(&m, &pln->ops, sizeof(pln->ops)); cannam@167: X(md5putb)(&m, &pln->adt, sizeof(pln->adt)); cannam@167: X(md5end)(&m); cannam@167: return ego->cost_hook ? ego->cost_hook(p, m.s[0], COST_MAX) : m.s[0]; cannam@167: } cannam@167: cannam@167: #endif cannam@167: cannam@167: double X(iestimate_cost)(const planner *ego, const plan *pln, const problem *p) cannam@167: { cannam@167: double cost = cannam@167: + pln->ops.add cannam@167: + pln->ops.mul cannam@167: cannam@167: #if HAVE_FMA cannam@167: + pln->ops.fma cannam@167: #else cannam@167: + 2 * pln->ops.fma cannam@167: #endif cannam@167: cannam@167: + pln->ops.other; cannam@167: if (ego->cost_hook) cannam@167: cost = ego->cost_hook(p, cost, COST_MAX); cannam@167: return cost; cannam@167: } cannam@167: cannam@167: static void evaluate_plan(planner *ego, plan *pln, const problem *p) cannam@167: { cannam@167: if (ESTIMATEP(ego) || !BELIEVE_PCOSTP(ego) || pln->pcost == 0.0) { cannam@167: ego->nplan++; cannam@167: cannam@167: if (ESTIMATEP(ego)) { cannam@167: estimate: cannam@167: /* heuristic */ cannam@167: #ifdef FFTW_RANDOM_ESTIMATOR cannam@167: pln->pcost = random_estimate(ego, pln, p); cannam@167: ego->epcost += X(iestimate_cost)(ego, pln, p); cannam@167: #else cannam@167: pln->pcost = X(iestimate_cost)(ego, pln, p); cannam@167: ego->epcost += pln->pcost; cannam@167: #endif cannam@167: } else { cannam@167: double t = X(measure_execution_time)(ego, pln, p); cannam@167: cannam@167: if (t < 0) { /* unavailable cycle counter */ cannam@167: /* Real programmers can write FORTRAN in any language */ cannam@167: goto estimate; cannam@167: } cannam@167: cannam@167: pln->pcost = t; cannam@167: ego->pcost += t; cannam@167: ego->need_timeout_check = 1; cannam@167: } cannam@167: } cannam@167: cannam@167: invoke_hook(ego, pln, p, 0); cannam@167: } cannam@167: cannam@167: /* maintain dynamic scoping of flags, nthr: */ cannam@167: static plan *invoke_solver(planner *ego, const problem *p, solver *s, cannam@167: const flags_t *nflags) cannam@167: { cannam@167: flags_t flags = ego->flags; cannam@167: int nthr = ego->nthr; cannam@167: plan *pln; cannam@167: ego->flags = *nflags; cannam@167: PLNR_TIMELIMIT_IMPATIENCE(ego) = 0; cannam@167: A(p->adt->problem_kind == s->adt->problem_kind); cannam@167: pln = s->adt->mkplan(s, p, ego); cannam@167: ego->nthr = nthr; cannam@167: ego->flags = flags; cannam@167: return pln; cannam@167: } cannam@167: cannam@167: /* maintain the invariant TIMED_OUT ==> NEED_TIMEOUT_CHECK */ cannam@167: static int timeout_p(planner *ego, const problem *p) cannam@167: { cannam@167: /* do not timeout when estimating. First, the estimator is the cannam@167: planner of last resort. Second, calling X(elapsed_since)() is cannam@167: slower than estimating */ cannam@167: if (!ESTIMATEP(ego)) { cannam@167: /* do not assume that X(elapsed_since)() is monotonic */ cannam@167: if (ego->timed_out) { cannam@167: A(ego->need_timeout_check); cannam@167: return 1; cannam@167: } cannam@167: cannam@167: if (ego->timelimit >= 0 && cannam@167: X(elapsed_since)(ego, p, ego->start_time) >= ego->timelimit) { cannam@167: ego->timed_out = 1; cannam@167: ego->need_timeout_check = 1; cannam@167: return 1; cannam@167: } cannam@167: } cannam@167: cannam@167: A(!ego->timed_out); cannam@167: ego->need_timeout_check = 0; cannam@167: return 0; cannam@167: } cannam@167: cannam@167: static plan *search0(planner *ego, const problem *p, unsigned *slvndx, cannam@167: const flags_t *flagsp) cannam@167: { cannam@167: plan *best = 0; cannam@167: int best_not_yet_timed = 1; cannam@167: cannam@167: /* Do not start a search if the planner timed out. This check is cannam@167: necessary, lest the relaxation mechanism kick in */ cannam@167: if (timeout_p(ego, p)) cannam@167: return 0; cannam@167: cannam@167: FORALL_SOLVERS_OF_KIND(p->adt->problem_kind, ego, s, sp, { cannam@167: plan *pln; cannam@167: cannam@167: pln = invoke_solver(ego, p, s, flagsp); cannam@167: cannam@167: if (ego->need_timeout_check) cannam@167: if (timeout_p(ego, p)) { cannam@167: X(plan_destroy_internal)(pln); cannam@167: X(plan_destroy_internal)(best); cannam@167: return 0; cannam@167: } cannam@167: cannam@167: if (pln) { cannam@167: /* read COULD_PRUNE_NOW_P because PLN may be destroyed cannam@167: before we use COULD_PRUNE_NOW_P */ cannam@167: int could_prune_now_p = pln->could_prune_now_p; cannam@167: cannam@167: if (best) { cannam@167: if (best_not_yet_timed) { cannam@167: evaluate_plan(ego, best, p); cannam@167: best_not_yet_timed = 0; cannam@167: } cannam@167: evaluate_plan(ego, pln, p); cannam@167: if (pln->pcost < best->pcost) { cannam@167: X(plan_destroy_internal)(best); cannam@167: best = pln; cannam@167: *slvndx = (unsigned)/*from ptrdiff_t*/(sp - ego->slvdescs); cannam@167: } else { cannam@167: X(plan_destroy_internal)(pln); cannam@167: } cannam@167: } else { cannam@167: best = pln; cannam@167: *slvndx = (unsigned)/*from ptrdiff_t*/(sp - ego->slvdescs); cannam@167: } cannam@167: cannam@167: if (ALLOW_PRUNINGP(ego) && could_prune_now_p) cannam@167: break; cannam@167: } cannam@167: }); cannam@167: cannam@167: return best; cannam@167: } cannam@167: cannam@167: static plan *search(planner *ego, const problem *p, unsigned *slvndx, cannam@167: flags_t *flagsp) cannam@167: { cannam@167: plan *pln = 0; cannam@167: unsigned i; cannam@167: cannam@167: /* relax impatience in this order: */ cannam@167: static const unsigned relax_tab[] = { cannam@167: 0, /* relax nothing */ cannam@167: NO_VRECURSE, cannam@167: NO_FIXED_RADIX_LARGE_N, cannam@167: NO_SLOW, cannam@167: NO_UGLY cannam@167: }; cannam@167: cannam@167: unsigned l_orig = flagsp->l; cannam@167: unsigned x = flagsp->u; cannam@167: cannam@167: /* guaranteed to be different from X */ cannam@167: unsigned last_x = ~x; cannam@167: cannam@167: for (i = 0; i < sizeof(relax_tab) / sizeof(relax_tab[0]); ++i) { cannam@167: if (LEQ(l_orig, x & ~relax_tab[i])) cannam@167: x = x & ~relax_tab[i]; cannam@167: cannam@167: if (x != last_x) { cannam@167: last_x = x; cannam@167: flagsp->l = x; cannam@167: pln = search0(ego, p, slvndx, flagsp); cannam@167: if (pln) break; cannam@167: } cannam@167: } cannam@167: cannam@167: if (!pln) { cannam@167: /* search [L_ORIG, U] */ cannam@167: if (l_orig != last_x) { cannam@167: last_x = l_orig; cannam@167: flagsp->l = l_orig; cannam@167: pln = search0(ego, p, slvndx, flagsp); cannam@167: } cannam@167: } cannam@167: cannam@167: return pln; cannam@167: } cannam@167: cannam@167: #define CHECK_FOR_BOGOSITY \ cannam@167: if ((ego->bogosity_hook ? \ cannam@167: (ego->wisdom_state = ego->bogosity_hook(ego->wisdom_state, p)) \ cannam@167: : ego->wisdom_state) == WISDOM_IS_BOGUS) \ cannam@167: goto wisdom_is_bogus; cannam@167: cannam@167: static plan *mkplan(planner *ego, const problem *p) cannam@167: { cannam@167: plan *pln; cannam@167: md5 m; cannam@167: unsigned slvndx; cannam@167: flags_t flags_of_solution; cannam@167: solution *sol; cannam@167: solver *s; cannam@167: cannam@167: ASSERT_ALIGNED_DOUBLE; cannam@167: A(LEQ(PLNR_L(ego), PLNR_U(ego))); cannam@167: cannam@167: if (ESTIMATEP(ego)) cannam@167: PLNR_TIMELIMIT_IMPATIENCE(ego) = 0; /* canonical form */ cannam@167: cannam@167: cannam@167: #ifdef FFTW_DEBUG cannam@167: check(&ego->htab_blessed); cannam@167: check(&ego->htab_unblessed); cannam@167: #endif cannam@167: cannam@167: pln = 0; cannam@167: cannam@167: CHECK_FOR_BOGOSITY; cannam@167: cannam@167: ego->timed_out = 0; cannam@167: cannam@167: ++ego->nprob; cannam@167: md5hash(&m, p, ego); cannam@167: cannam@167: flags_of_solution = ego->flags; cannam@167: cannam@167: if (ego->wisdom_state != WISDOM_IGNORE_ALL) { cannam@167: if ((sol = hlookup(ego, m.s, &flags_of_solution))) { cannam@167: /* wisdom is acceptable */ cannam@167: wisdom_state_t owisdom_state = ego->wisdom_state; cannam@167: cannam@167: /* this hook is mainly for MPI, to make sure that cannam@167: wisdom is in sync across all processes for MPI problems */ cannam@167: if (ego->wisdom_ok_hook && !ego->wisdom_ok_hook(p, sol->flags)) cannam@167: goto do_search; /* ignore not-ok wisdom */ cannam@167: cannam@167: slvndx = SLVNDX(sol); cannam@167: cannam@167: if (slvndx == INFEASIBLE_SLVNDX) { cannam@167: if (ego->wisdom_state == WISDOM_IGNORE_INFEASIBLE) cannam@167: goto do_search; cannam@167: else cannam@167: return 0; /* known to be infeasible */ cannam@167: } cannam@167: cannam@167: flags_of_solution = sol->flags; cannam@167: cannam@167: /* inherit blessing either from wisdom cannam@167: or from the planner */ cannam@167: flags_of_solution.hash_info |= BLISS(ego->flags); cannam@167: cannam@167: ego->wisdom_state = WISDOM_ONLY; cannam@167: cannam@167: s = ego->slvdescs[slvndx].slv; cannam@167: if (p->adt->problem_kind != s->adt->problem_kind) cannam@167: goto wisdom_is_bogus; cannam@167: cannam@167: pln = invoke_solver(ego, p, s, &flags_of_solution); cannam@167: cannam@167: CHECK_FOR_BOGOSITY; /* catch error in child solvers */ cannam@167: cannam@167: sol = 0; /* Paranoia: SOL may be dangling after cannam@167: invoke_solver(); make sure we don't accidentally cannam@167: reuse it. */ cannam@167: cannam@167: if (!pln) cannam@167: goto wisdom_is_bogus; cannam@167: cannam@167: ego->wisdom_state = owisdom_state; cannam@167: cannam@167: goto skip_search; cannam@167: } cannam@167: else if (ego->nowisdom_hook) /* for MPI, make sure lack of wisdom */ cannam@167: ego->nowisdom_hook(p); /* is in sync across all processes */ cannam@167: } cannam@167: cannam@167: do_search: cannam@167: /* cannot search in WISDOM_ONLY mode */ cannam@167: if (ego->wisdom_state == WISDOM_ONLY) cannam@167: goto wisdom_is_bogus; cannam@167: cannam@167: flags_of_solution = ego->flags; cannam@167: pln = search(ego, p, &slvndx, &flags_of_solution); cannam@167: CHECK_FOR_BOGOSITY; /* catch error in child solvers */ cannam@167: cannam@167: if (ego->timed_out) { cannam@167: A(!pln); cannam@167: if (PLNR_TIMELIMIT_IMPATIENCE(ego) != 0) { cannam@167: /* record (below) that this plan has failed because of cannam@167: timeout */ cannam@167: flags_of_solution.hash_info |= BLESSING; cannam@167: } else { cannam@167: /* this is not the top-level problem or timeout is not cannam@167: active: record no wisdom. */ cannam@167: return 0; cannam@167: } cannam@167: } else { cannam@167: /* canonicalize to infinite timeout */ cannam@167: flags_of_solution.timelimit_impatience = 0; cannam@167: } cannam@167: cannam@167: skip_search: cannam@167: if (ego->wisdom_state == WISDOM_NORMAL || cannam@167: ego->wisdom_state == WISDOM_ONLY) { cannam@167: if (pln) { cannam@167: hinsert(ego, m.s, &flags_of_solution, slvndx); cannam@167: invoke_hook(ego, pln, p, 1); cannam@167: } else { cannam@167: hinsert(ego, m.s, &flags_of_solution, INFEASIBLE_SLVNDX); cannam@167: } cannam@167: } cannam@167: cannam@167: return pln; cannam@167: cannam@167: wisdom_is_bogus: cannam@167: X(plan_destroy_internal)(pln); cannam@167: ego->wisdom_state = WISDOM_IS_BOGUS; cannam@167: return 0; cannam@167: } cannam@167: cannam@167: static void htab_destroy(hashtab *ht) cannam@167: { cannam@167: X(ifree)(ht->solutions); cannam@167: ht->solutions = 0; cannam@167: ht->nelem = 0U; cannam@167: } cannam@167: cannam@167: static void mkhashtab(hashtab *ht) cannam@167: { cannam@167: ht->nrehash = 0; cannam@167: ht->succ_lookup = ht->lookup = ht->lookup_iter = 0; cannam@167: ht->insert = ht->insert_iter = ht->insert_unknown = 0; cannam@167: cannam@167: ht->solutions = 0; cannam@167: ht->hashsiz = ht->nelem = 0U; cannam@167: hgrow(ht); /* so that hashsiz > 0 */ cannam@167: } cannam@167: cannam@167: /* destroy hash table entries. If FORGET_EVERYTHING, destroy the whole cannam@167: table. If FORGET_ACCURSED, then destroy entries that are not blessed. */ cannam@167: static void forget(planner *ego, amnesia a) cannam@167: { cannam@167: switch (a) { cannam@167: case FORGET_EVERYTHING: cannam@167: htab_destroy(&ego->htab_blessed); cannam@167: mkhashtab(&ego->htab_blessed); cannam@167: /* fall through */ cannam@167: case FORGET_ACCURSED: cannam@167: htab_destroy(&ego->htab_unblessed); cannam@167: mkhashtab(&ego->htab_unblessed); cannam@167: break; cannam@167: default: cannam@167: break; cannam@167: } cannam@167: } cannam@167: cannam@167: /* FIXME: what sort of version information should we write? */ cannam@167: #define WISDOM_PREAMBLE PACKAGE "-" VERSION " " STRINGIZE(X(wisdom)) cannam@167: static const char stimeout[] = "TIMEOUT"; cannam@167: cannam@167: /* tantus labor non sit cassus */ cannam@167: static void exprt(planner *ego, printer *p) cannam@167: { cannam@167: unsigned h; cannam@167: hashtab *ht = &ego->htab_blessed; cannam@167: md5 m; cannam@167: cannam@167: signature_of_configuration(&m, ego); cannam@167: cannam@167: p->print(p, cannam@167: "(" WISDOM_PREAMBLE " #x%M #x%M #x%M #x%M\n", cannam@167: m.s[0], m.s[1], m.s[2], m.s[3]); cannam@167: cannam@167: for (h = 0; h < ht->hashsiz; ++h) { cannam@167: solution *l = ht->solutions + h; cannam@167: if (LIVEP(l)) { cannam@167: const char *reg_nam; cannam@167: int reg_id; cannam@167: cannam@167: if (SLVNDX(l) == INFEASIBLE_SLVNDX) { cannam@167: reg_nam = stimeout; cannam@167: reg_id = 0; cannam@167: } else { cannam@167: slvdesc *sp = ego->slvdescs + SLVNDX(l); cannam@167: reg_nam = sp->reg_nam; cannam@167: reg_id = sp->reg_id; cannam@167: } cannam@167: cannam@167: /* qui salvandos salvas gratis cannam@167: salva me fons pietatis */ cannam@167: p->print(p, " (%s %d #x%x #x%x #x%x #x%M #x%M #x%M #x%M)\n", cannam@167: reg_nam, reg_id, cannam@167: l->flags.l, l->flags.u, l->flags.timelimit_impatience, cannam@167: l->s[0], l->s[1], l->s[2], l->s[3]); cannam@167: } cannam@167: } cannam@167: p->print(p, ")\n"); cannam@167: } cannam@167: cannam@167: /* mors stupebit et natura cannam@167: cum resurget creatura */ cannam@167: static int imprt(planner *ego, scanner *sc) cannam@167: { cannam@167: char buf[MAXNAM + 1]; cannam@167: md5uint sig[4]; cannam@167: unsigned l, u, timelimit_impatience; cannam@167: flags_t flags; cannam@167: int reg_id; cannam@167: unsigned slvndx; cannam@167: hashtab *ht = &ego->htab_blessed; cannam@167: hashtab old; cannam@167: md5 m; cannam@167: cannam@167: if (!sc->scan(sc, cannam@167: "(" WISDOM_PREAMBLE " #x%M #x%M #x%M #x%M\n", cannam@167: sig + 0, sig + 1, sig + 2, sig + 3)) cannam@167: return 0; /* don't need to restore hashtable */ cannam@167: cannam@167: signature_of_configuration(&m, ego); cannam@167: if (m.s[0] != sig[0] || m.s[1] != sig[1] || cannam@167: m.s[2] != sig[2] || m.s[3] != sig[3]) { cannam@167: /* invalid configuration */ cannam@167: return 0; cannam@167: } cannam@167: cannam@167: /* make a backup copy of the hash table (cache the hash) */ cannam@167: { cannam@167: unsigned h, hsiz = ht->hashsiz; cannam@167: old = *ht; cannam@167: old.solutions = (solution *)MALLOC(hsiz * sizeof(solution), HASHT); cannam@167: for (h = 0; h < hsiz; ++h) cannam@167: old.solutions[h] = ht->solutions[h]; cannam@167: } cannam@167: cannam@167: while (1) { cannam@167: if (sc->scan(sc, ")")) cannam@167: break; cannam@167: cannam@167: /* qua resurget ex favilla */ cannam@167: if (!sc->scan(sc, "(%*s %d #x%x #x%x #x%x #x%M #x%M #x%M #x%M)", cannam@167: MAXNAM, buf, ®_id, &l, &u, &timelimit_impatience, cannam@167: sig + 0, sig + 1, sig + 2, sig + 3)) cannam@167: goto bad; cannam@167: cannam@167: if (!strcmp(buf, stimeout) && reg_id == 0) { cannam@167: slvndx = INFEASIBLE_SLVNDX; cannam@167: } else { cannam@167: if (timelimit_impatience != 0) cannam@167: goto bad; cannam@167: cannam@167: slvndx = slookup(ego, buf, reg_id); cannam@167: if (slvndx == INFEASIBLE_SLVNDX) cannam@167: goto bad; cannam@167: } cannam@167: cannam@167: /* inter oves locum praesta */ cannam@167: flags.l = l; cannam@167: flags.u = u; cannam@167: flags.timelimit_impatience = timelimit_impatience; cannam@167: flags.hash_info = BLESSING; cannam@167: cannam@167: CK(flags.l == l); cannam@167: CK(flags.u == u); cannam@167: CK(flags.timelimit_impatience == timelimit_impatience); cannam@167: cannam@167: if (!hlookup(ego, sig, &flags)) cannam@167: hinsert(ego, sig, &flags, slvndx); cannam@167: } cannam@167: cannam@167: X(ifree0)(old.solutions); cannam@167: return 1; cannam@167: cannam@167: bad: cannam@167: /* ``The wisdom of FFTW must be above suspicion.'' */ cannam@167: X(ifree0)(ht->solutions); cannam@167: *ht = old; cannam@167: return 0; cannam@167: } cannam@167: cannam@167: /* cannam@167: * create a planner cannam@167: */ cannam@167: planner *X(mkplanner)(void) cannam@167: { cannam@167: int i; cannam@167: cannam@167: static const planner_adt padt = { cannam@167: register_solver, mkplan, forget, exprt, imprt cannam@167: }; cannam@167: cannam@167: planner *p = (planner *) MALLOC(sizeof(planner), PLANNERS); cannam@167: cannam@167: p->adt = &padt; cannam@167: p->nplan = p->nprob = 0; cannam@167: p->pcost = p->epcost = 0.0; cannam@167: p->hook = 0; cannam@167: p->cost_hook = 0; cannam@167: p->wisdom_ok_hook = 0; cannam@167: p->nowisdom_hook = 0; cannam@167: p->bogosity_hook = 0; cannam@167: p->cur_reg_nam = 0; cannam@167: p->wisdom_state = WISDOM_NORMAL; cannam@167: cannam@167: p->slvdescs = 0; cannam@167: p->nslvdesc = p->slvdescsiz = 0; cannam@167: cannam@167: p->flags.l = 0; cannam@167: p->flags.u = 0; cannam@167: p->flags.timelimit_impatience = 0; cannam@167: p->flags.hash_info = 0; cannam@167: p->nthr = 1; cannam@167: p->need_timeout_check = 1; cannam@167: p->timelimit = -1; cannam@167: cannam@167: mkhashtab(&p->htab_blessed); cannam@167: mkhashtab(&p->htab_unblessed); cannam@167: cannam@167: for (i = 0; i < PROBLEM_LAST; ++i) cannam@167: p->slvdescs_for_problem_kind[i] = -1; cannam@167: cannam@167: return p; cannam@167: } cannam@167: cannam@167: void X(planner_destroy)(planner *ego) cannam@167: { cannam@167: /* destroy hash table */ cannam@167: htab_destroy(&ego->htab_blessed); cannam@167: htab_destroy(&ego->htab_unblessed); cannam@167: cannam@167: /* destroy solvdesc table */ cannam@167: FORALL_SOLVERS(ego, s, sp, { cannam@167: UNUSED(sp); cannam@167: X(solver_destroy)(s); cannam@167: }); cannam@167: cannam@167: X(ifree0)(ego->slvdescs); cannam@167: X(ifree)(ego); /* dona eis requiem */ cannam@167: } cannam@167: cannam@167: plan *X(mkplan_d)(planner *ego, problem *p) cannam@167: { cannam@167: plan *pln = ego->adt->mkplan(ego, p); cannam@167: X(problem_destroy)(p); cannam@167: return pln; cannam@167: } cannam@167: cannam@167: /* like X(mkplan_d), but sets/resets flags as well */ cannam@167: plan *X(mkplan_f_d)(planner *ego, problem *p, cannam@167: unsigned l_set, unsigned u_set, unsigned u_reset) cannam@167: { cannam@167: flags_t oflags = ego->flags; cannam@167: plan *pln; cannam@167: cannam@167: PLNR_U(ego) &= ~u_reset; cannam@167: PLNR_L(ego) &= ~u_reset; cannam@167: PLNR_L(ego) |= l_set; cannam@167: PLNR_U(ego) |= u_set | l_set; cannam@167: pln = X(mkplan_d)(ego, p); cannam@167: ego->flags = oflags; cannam@167: return pln; cannam@167: } cannam@167: cannam@167: /* cannam@167: * Debugging code: cannam@167: */ cannam@167: #ifdef FFTW_DEBUG cannam@167: static void check(hashtab *ht) cannam@167: { cannam@167: unsigned live = 0; cannam@167: unsigned i; cannam@167: cannam@167: A(ht->nelem < ht->hashsiz); cannam@167: cannam@167: for (i = 0; i < ht->hashsiz; ++i) { cannam@167: solution *l = ht->solutions + i; cannam@167: if (LIVEP(l)) cannam@167: ++live; cannam@167: } cannam@167: cannam@167: A(ht->nelem == live); cannam@167: cannam@167: for (i = 0; i < ht->hashsiz; ++i) { cannam@167: solution *l1 = ht->solutions + i; cannam@167: int foundit = 0; cannam@167: if (LIVEP(l1)) { cannam@167: unsigned g, h = h1(ht, l1->s), d = h2(ht, l1->s); cannam@167: cannam@167: g = h; cannam@167: do { cannam@167: solution *l = ht->solutions + g; cannam@167: if (VALIDP(l)) { cannam@167: if (l1 == l) cannam@167: foundit = 1; cannam@167: else if (LIVEP(l) && md5eq(l1->s, l->s)) { cannam@167: A(!subsumes(&l->flags, SLVNDX(l), &l1->flags)); cannam@167: A(!subsumes(&l1->flags, SLVNDX(l1), &l->flags)); cannam@167: } cannam@167: } else cannam@167: break; cannam@167: g = addmod(g, d, ht->hashsiz); cannam@167: } while (g != h); cannam@167: cannam@167: A(foundit); cannam@167: } cannam@167: } cannam@167: } cannam@167: #endif