diff import/Import.cpp @ 10:d35e5d769c87 classical-rdf

* some experiments with composer name matching
author Chris Cannam
date Wed, 17 Feb 2010 19:26:48 +0000
parents df999875c53b
children c8ef23d3888c
line wrap: on
line diff
--- a/import/Import.cpp	Fri Feb 12 16:56:29 2010 +0000
+++ b/import/Import.cpp	Wed Feb 17 19:26:48 2010 +0000
@@ -143,76 +143,6 @@
     c->addAlias(nr);
 }
 
-QString makeNameKey(QString name)
-{
-    QString key = name.toLower()
-        .replace("'", "")
-        .replace("x", "ks")
-        .replace("y", "i")
-        .replace("k", "c")
-        .replace("ch", "c")
-        .replace("cc", "c")
-        .replace("v", "f")
-        .replace("ff", "f")
-        .replace("th", "t")
-        .replace("tch", "ch")
-        .replace("er", "r");
-//    DEBUG << "makeNameKey(" << name << "): " << key << endl;
-    return key;
-}
-
-bool namesFuzzyMatch(QString an, Composer *b)
-{
-    // ew!
-
-    QString bn = b->name();
-    if (bn == an) return true;
-    if (b->aliases().contains(an)) return true;
-    int aSurnameIndex = 0, bSurnameIndex = 0;
-    if (an.contains(",")) {
-        an.replace(",", "");
-    } else {
-        aSurnameIndex = -1;
-    }
-    if (bn.contains(",")) {
-        bn.replace(",", "");
-    } else {
-        bSurnameIndex = -1;
-    }
-    QStringList nl = an.split(QRegExp("[ -]"));
-    QStringList bnl = makeNameKey(bn).split(QRegExp("[ -]"));
-    int matchCount = 0;
-    QString surnameMatch = "";
-    if (aSurnameIndex == -1) aSurnameIndex = nl.size()-1;
-    if (bSurnameIndex == -1) bSurnameIndex = bnl.size()-1;
-    if (nl[aSurnameIndex][0].isUpper() &&
-        nl[aSurnameIndex] != "Della" &&
-        makeNameKey(nl[aSurnameIndex]) == bnl[bSurnameIndex]) {
-        surnameMatch = nl[aSurnameIndex];
-    }
-    int tested = 0;
-    foreach (QString elt, nl) {
-        if (!elt[0].isUpper() || elt == "Della") continue;
-        QString k = makeNameKey(elt);
-        if (bnl.contains(k)) {
-            ++matchCount;
-        }
-        if (++tested == 2 && matchCount == 0) {
-            return false;
-        }
-    }
-    if (surnameMatch != "") {
-        DEBUG << "namesFuzzyMatch: note: surnameMatch = " << surnameMatch << endl;
-        if (matchCount > 1) {
-            return true;
-        } else {
-            DEBUG << "(but not enough else matched)" << endl;
-            return false;
-        }
-    }
-    return false;
-}
-
 bool
 hasBetterName(Composer *c, Composer *other)
 {
@@ -264,21 +194,21 @@
     QSet<Composer *> matches;
 
     foreach (QString candidateName, allNames) {
-        QString key = makeNameKey(candidateName);
+        QString key = Composer::reduceName(candidateName);
         if (composers.contains(key)) {
             foreach (Composer *candidate, composers[key]) {
                 if (candidateName == dates) {
                     if (c->name() == candidate->name()) {
                         DEBUG << "mergeComposer: Exact name match for " << c->name() << " with date(s) " << dates << endl;
-                    } else if (!namesFuzzyMatch(c->name(), candidate) &&
-                               !namesFuzzyMatch(candidate->name(), c)) {
+                    } else if (!candidate->matchCatalogueName(c->name()) &&
+                               !c->matchCatalogueName(candidate->name())) {
                         DEBUG << "mergeComposer: Names differ for " << c->name() << " and " << candidate->name() << " (having matched date(s) " << dates << ")" << endl;
                         continue;
                     } else {
                         DEBUG << "mergeComposer: Note: Fuzzy name match for " << c->name() << " and " << candidate->name() << " with date(s) " << dates << endl;
                     }
                 } else {
-                    if (!c->datesMatch(candidate)) {
+                    if (!c->matchDates(candidate)) {
                         DEBUG << "mergeComposer: Dates differ for " << c->name() << " and " << candidate->name() << endl;
                         continue;
                     }
@@ -297,7 +227,7 @@
             for (ComposerMap::iterator i = composers.begin();
                  i != composers.end(); ++i) {
                 foreach (Composer *candidate, *i) {
-                    if (namesFuzzyMatch(c->name(), candidate)) {
+                    if (candidate->matchCatalogueName(c->name())) {
                         DEBUG << "mergeComposer: Found fuzzy match for undated composer " << c->name() << " as " << candidate->name() << ", daringly merging" << endl;
                         matches.insert(candidate);
                         break;
@@ -309,7 +239,7 @@
 
         if (matches.empty()) {
             foreach (QString candidateName, allNames) {
-                QString key = makeNameKey(candidateName);
+                QString key = Composer::reduceName(candidateName);
                 composers[key].insert(c);
                 DEBUG << "added for alias or date " << candidateName << endl;
             }
@@ -331,14 +261,14 @@
     } else {
         other->addAlias(c->name());
     }
-    composers[makeNameKey(c->name())].insert(other);
+    composers[Composer::reduceName(c->name())].insert(other);
     DEBUG << "linking from alias " << c->name() << endl;
 
     foreach (QString alias, c->aliases()) {
         if (alias != other->name() && 
             !other->aliases().contains(alias)) {
             other->addAlias(alias);
-            composers[makeNameKey(alias)].insert(other);
+            composers[Composer::reduceName(alias)].insert(other);
             DEBUG << "linking from alias " << alias << endl;
         }
     }
@@ -642,7 +572,7 @@
         if (!cn) continue;
         if (!cn->composer()) {
             QString cname = cn->composerName();
-            QString key = makeNameKey(cname);
+            QString key = Composer::reduceName(cname);
             if (cname != "") {
                 if (!composers.contains(key)) {
                     DEBUG << "Failed to assign Composition to composer: no composer matches name " << cname << endl;