Mercurial > hg > classical
diff import/Import.cpp @ 10:d35e5d769c87 classical-rdf
* some experiments with composer name matching
author | Chris Cannam |
---|---|
date | Wed, 17 Feb 2010 19:26:48 +0000 |
parents | df999875c53b |
children | c8ef23d3888c |
line wrap: on
line diff
--- a/import/Import.cpp Fri Feb 12 16:56:29 2010 +0000 +++ b/import/Import.cpp Wed Feb 17 19:26:48 2010 +0000 @@ -143,76 +143,6 @@ c->addAlias(nr); } -QString makeNameKey(QString name) -{ - QString key = name.toLower() - .replace("'", "") - .replace("x", "ks") - .replace("y", "i") - .replace("k", "c") - .replace("ch", "c") - .replace("cc", "c") - .replace("v", "f") - .replace("ff", "f") - .replace("th", "t") - .replace("tch", "ch") - .replace("er", "r"); -// DEBUG << "makeNameKey(" << name << "): " << key << endl; - return key; -} - -bool namesFuzzyMatch(QString an, Composer *b) -{ - // ew! - - QString bn = b->name(); - if (bn == an) return true; - if (b->aliases().contains(an)) return true; - int aSurnameIndex = 0, bSurnameIndex = 0; - if (an.contains(",")) { - an.replace(",", ""); - } else { - aSurnameIndex = -1; - } - if (bn.contains(",")) { - bn.replace(",", ""); - } else { - bSurnameIndex = -1; - } - QStringList nl = an.split(QRegExp("[ -]")); - QStringList bnl = makeNameKey(bn).split(QRegExp("[ -]")); - int matchCount = 0; - QString surnameMatch = ""; - if (aSurnameIndex == -1) aSurnameIndex = nl.size()-1; - if (bSurnameIndex == -1) bSurnameIndex = bnl.size()-1; - if (nl[aSurnameIndex][0].isUpper() && - nl[aSurnameIndex] != "Della" && - makeNameKey(nl[aSurnameIndex]) == bnl[bSurnameIndex]) { - surnameMatch = nl[aSurnameIndex]; - } - int tested = 0; - foreach (QString elt, nl) { - if (!elt[0].isUpper() || elt == "Della") continue; - QString k = makeNameKey(elt); - if (bnl.contains(k)) { - ++matchCount; - } - if (++tested == 2 && matchCount == 0) { - return false; - } - } - if (surnameMatch != "") { - DEBUG << "namesFuzzyMatch: note: surnameMatch = " << surnameMatch << endl; - if (matchCount > 1) { - return true; - } else { - DEBUG << "(but not enough else matched)" << endl; - return false; - } - } - return false; -} - bool hasBetterName(Composer *c, Composer *other) { @@ -264,21 +194,21 @@ QSet<Composer *> matches; foreach (QString candidateName, allNames) { - QString key = makeNameKey(candidateName); + QString key = Composer::reduceName(candidateName); if (composers.contains(key)) { foreach (Composer *candidate, composers[key]) { if (candidateName == dates) { if (c->name() == candidate->name()) { DEBUG << "mergeComposer: Exact name match for " << c->name() << " with date(s) " << dates << endl; - } else if (!namesFuzzyMatch(c->name(), candidate) && - !namesFuzzyMatch(candidate->name(), c)) { + } else if (!candidate->matchCatalogueName(c->name()) && + !c->matchCatalogueName(candidate->name())) { DEBUG << "mergeComposer: Names differ for " << c->name() << " and " << candidate->name() << " (having matched date(s) " << dates << ")" << endl; continue; } else { DEBUG << "mergeComposer: Note: Fuzzy name match for " << c->name() << " and " << candidate->name() << " with date(s) " << dates << endl; } } else { - if (!c->datesMatch(candidate)) { + if (!c->matchDates(candidate)) { DEBUG << "mergeComposer: Dates differ for " << c->name() << " and " << candidate->name() << endl; continue; } @@ -297,7 +227,7 @@ for (ComposerMap::iterator i = composers.begin(); i != composers.end(); ++i) { foreach (Composer *candidate, *i) { - if (namesFuzzyMatch(c->name(), candidate)) { + if (candidate->matchCatalogueName(c->name())) { DEBUG << "mergeComposer: Found fuzzy match for undated composer " << c->name() << " as " << candidate->name() << ", daringly merging" << endl; matches.insert(candidate); break; @@ -309,7 +239,7 @@ if (matches.empty()) { foreach (QString candidateName, allNames) { - QString key = makeNameKey(candidateName); + QString key = Composer::reduceName(candidateName); composers[key].insert(c); DEBUG << "added for alias or date " << candidateName << endl; } @@ -331,14 +261,14 @@ } else { other->addAlias(c->name()); } - composers[makeNameKey(c->name())].insert(other); + composers[Composer::reduceName(c->name())].insert(other); DEBUG << "linking from alias " << c->name() << endl; foreach (QString alias, c->aliases()) { if (alias != other->name() && !other->aliases().contains(alias)) { other->addAlias(alias); - composers[makeNameKey(alias)].insert(other); + composers[Composer::reduceName(alias)].insert(other); DEBUG << "linking from alias " << alias << endl; } } @@ -642,7 +572,7 @@ if (!cn) continue; if (!cn->composer()) { QString cname = cn->composerName(); - QString key = makeNameKey(cname); + QString key = Composer::reduceName(cname); if (cname != "") { if (!composers.contains(key)) { DEBUG << "Failed to assign Composition to composer: no composer matches name " << cname << endl;