Mercurial > hg > classical
changeset 5:d23a4c935a22 classical-rdf
* Update CMN and mbz mappings for new classical archives import
author | Chris Cannam |
---|---|
date | Fri, 11 Dec 2009 16:10:29 +0000 |
parents | 719a4f477098 |
children | 96bf272e74c5 |
files | import/Import.cpp import/ImportClassicalArchives.cpp |
diffstat | 2 files changed, 47 insertions(+), 14 deletions(-) [+] |
line wrap: on
line diff
--- a/import/Import.cpp Thu Dec 10 15:15:40 2009 +0000 +++ b/import/Import.cpp Fri Dec 11 16:10:29 2009 +0000 @@ -141,6 +141,24 @@ c->addAlias(nr); } +QString makeNameKey(QString name) +{ + QString key = name.toLower() + .replace("'", "") + .replace("x", "ks") + .replace("y", "i") + .replace("k", "c") + .replace("ch", "c") + .replace("cc", "c") + .replace("v", "f") + .replace("ff", "f") + .replace("th", "t") + .replace("tch", "ch") + .replace("er", "r"); +// DEBUG << "makeNameKey(" << name << "): " << key << endl; + return key; +} + bool namesFuzzyMatch(QString an, Composer *b) { // ew! @@ -160,26 +178,35 @@ bSurnameIndex = -1; } QStringList nl = an.split(QRegExp("[ -]")); - QStringList bnl = bn.split(QRegExp("[ -]")); + QStringList bnl = makeNameKey(bn).split(QRegExp("[ -]")); int matchCount = 0; QString surnameMatch = ""; if (aSurnameIndex == -1) aSurnameIndex = nl.size()-1; if (bSurnameIndex == -1) bSurnameIndex = bnl.size()-1; if (nl[aSurnameIndex][0].isUpper() && nl[aSurnameIndex] != "Della" && - nl[aSurnameIndex] == bnl[bSurnameIndex]) { + makeNameKey(nl[aSurnameIndex]) == bnl[bSurnameIndex]) { surnameMatch = nl[aSurnameIndex]; } + int tested = 0; foreach (QString elt, nl) { if (!elt[0].isUpper() || elt == "Della") continue; - if (bnl.contains(elt)) { + QString k = makeNameKey(elt); + if (bnl.contains(k)) { ++matchCount; - continue; + } + if (++tested == 2 && matchCount == 0) { + return false; } } - if (matchCount > 1 && surnameMatch != "") { + if (surnameMatch != "") { DEBUG << "namesFuzzyMatch: note: surnameMatch = " << surnameMatch << endl; - return true; + if (matchCount > 1) { + return true; + } else { + DEBUG << "(but not enough else matched)" << endl; + return false; + } } return false; } @@ -235,12 +262,14 @@ QSet<Composer *> matches; foreach (QString candidateName, allNames) { - QString key = candidateName.toLower(); + QString key = makeNameKey(candidateName); if (composers.contains(key)) { foreach (Composer *candidate, composers[key]) { if (candidateName == dates) { - if (!namesFuzzyMatch(c->name(), candidate) && - !namesFuzzyMatch(candidate->name(), c)) { + if (c->name() == candidate->name()) { + DEBUG << "mergeComposer: Exact name match for " << c->name() << " with date(s) " << dates << endl; + } else if (!namesFuzzyMatch(c->name(), candidate) && + !namesFuzzyMatch(candidate->name(), c)) { DEBUG << "mergeComposer: Names differ for " << c->name() << " and " << candidate->name() << " (having matched date(s) " << dates << ")" << endl; continue; } else { @@ -261,6 +290,7 @@ DEBUG << "mergeComposer: No existing composer with alias matching any alias of " << c->name() << ", adding" << endl; if (!c->birth() && !c->death()) { + DEBUG << "Composer has no dates, laboriously searching for all names" << endl; // laboriously look for fuzzy match across _all_ composers for (ComposerMap::iterator i = composers.begin(); i != composers.end(); ++i) { @@ -277,7 +307,8 @@ if (matches.empty()) { foreach (QString candidateName, allNames) { - composers[candidateName.toLower()].insert(c); + QString key = makeNameKey(candidateName); + composers[key].insert(c); DEBUG << "added for alias or date " << candidateName << endl; } return; @@ -298,14 +329,14 @@ } else { other->addAlias(c->name()); } - composers[c->name().toLower()].insert(other); + composers[makeNameKey(c->name())].insert(other); DEBUG << "linking from alias " << c->name() << endl; foreach (QString alias, c->aliases()) { if (alias != other->name() && !other->aliases().contains(alias)) { other->addAlias(alias); - composers[alias.toLower()].insert(other); + composers[makeNameKey(alias)].insert(other); DEBUG << "linking from alias " << alias << endl; } } @@ -753,11 +784,12 @@ if (!cn) continue; if (!cn->composer()) { QString cname = cn->composerName(); + QString key = makeNameKey(cname); if (cname != "") { - if (!composers.contains(cname.toLower())) { + if (!composers.contains(key)) { DEBUG << "Failed to assign Composition to composer: no composer matches name " << cname << endl; } else { - QSet<Composer *> cs = composers[cname.toLower()]; + QSet<Composer *> cs = composers[key]; if (cs.empty()) { DEBUG << "Failed to assign Composition to composer: no composer matches name " << cname << endl; } else if (cs.size() > 1) {
--- a/import/ImportClassicalArchives.cpp Thu Dec 10 15:15:40 2009 +0000 +++ b/import/ImportClassicalArchives.cpp Fri Dec 11 16:10:29 2009 +0000 @@ -209,6 +209,7 @@ // names (sorry) if (!field.contains(",")) return; + field.replace(QRegExp(" +,"), ","); field = field.trimmed(); names.push_back(field);