changeset 5:d23a4c935a22 classical-rdf

* Update CMN and mbz mappings for new classical archives import
author Chris Cannam
date Fri, 11 Dec 2009 16:10:29 +0000
parents 719a4f477098
children 96bf272e74c5
files import/Import.cpp import/ImportClassicalArchives.cpp
diffstat 2 files changed, 47 insertions(+), 14 deletions(-) [+]
line wrap: on
line diff
--- a/import/Import.cpp	Thu Dec 10 15:15:40 2009 +0000
+++ b/import/Import.cpp	Fri Dec 11 16:10:29 2009 +0000
@@ -141,6 +141,24 @@
     c->addAlias(nr);
 }
 
+QString makeNameKey(QString name)
+{
+    QString key = name.toLower()
+        .replace("'", "")
+        .replace("x", "ks")
+        .replace("y", "i")
+        .replace("k", "c")
+        .replace("ch", "c")
+        .replace("cc", "c")
+        .replace("v", "f")
+        .replace("ff", "f")
+        .replace("th", "t")
+        .replace("tch", "ch")
+        .replace("er", "r");
+//    DEBUG << "makeNameKey(" << name << "): " << key << endl;
+    return key;
+}
+
 bool namesFuzzyMatch(QString an, Composer *b)
 {
     // ew!
@@ -160,26 +178,35 @@
         bSurnameIndex = -1;
     }
     QStringList nl = an.split(QRegExp("[ -]"));
-    QStringList bnl = bn.split(QRegExp("[ -]"));
+    QStringList bnl = makeNameKey(bn).split(QRegExp("[ -]"));
     int matchCount = 0;
     QString surnameMatch = "";
     if (aSurnameIndex == -1) aSurnameIndex = nl.size()-1;
     if (bSurnameIndex == -1) bSurnameIndex = bnl.size()-1;
     if (nl[aSurnameIndex][0].isUpper() &&
         nl[aSurnameIndex] != "Della" &&
-        nl[aSurnameIndex] == bnl[bSurnameIndex]) {
+        makeNameKey(nl[aSurnameIndex]) == bnl[bSurnameIndex]) {
         surnameMatch = nl[aSurnameIndex];
     }
+    int tested = 0;
     foreach (QString elt, nl) {
         if (!elt[0].isUpper() || elt == "Della") continue;
-        if (bnl.contains(elt)) {
+        QString k = makeNameKey(elt);
+        if (bnl.contains(k)) {
             ++matchCount;
-            continue;
+        }
+        if (++tested == 2 && matchCount == 0) {
+            return false;
         }
     }
-    if (matchCount > 1 && surnameMatch != "") {
+    if (surnameMatch != "") {
         DEBUG << "namesFuzzyMatch: note: surnameMatch = " << surnameMatch << endl;
-        return true;
+        if (matchCount > 1) {
+            return true;
+        } else {
+            DEBUG << "(but not enough else matched)" << endl;
+            return false;
+        }
     }
     return false;
 }
@@ -235,12 +262,14 @@
     QSet<Composer *> matches;
 
     foreach (QString candidateName, allNames) {
-        QString key = candidateName.toLower();
+        QString key = makeNameKey(candidateName);
         if (composers.contains(key)) {
             foreach (Composer *candidate, composers[key]) {
                 if (candidateName == dates) {
-                    if (!namesFuzzyMatch(c->name(), candidate) &&
-                        !namesFuzzyMatch(candidate->name(), c)) {
+                    if (c->name() == candidate->name()) {
+                        DEBUG << "mergeComposer: Exact name match for " << c->name() << " with date(s) " << dates << endl;
+                    } else if (!namesFuzzyMatch(c->name(), candidate) &&
+                               !namesFuzzyMatch(candidate->name(), c)) {
                         DEBUG << "mergeComposer: Names differ for " << c->name() << " and " << candidate->name() << " (having matched date(s) " << dates << ")" << endl;
                         continue;
                     } else {
@@ -261,6 +290,7 @@
         DEBUG << "mergeComposer: No existing composer with alias matching any alias of " << c->name() << ", adding" << endl;
 
         if (!c->birth() && !c->death()) {
+            DEBUG << "Composer has no dates, laboriously searching for all names" << endl;
             // laboriously look for fuzzy match across _all_ composers
             for (ComposerMap::iterator i = composers.begin();
                  i != composers.end(); ++i) {
@@ -277,7 +307,8 @@
 
         if (matches.empty()) {
             foreach (QString candidateName, allNames) {
-                composers[candidateName.toLower()].insert(c);
+                QString key = makeNameKey(candidateName);
+                composers[key].insert(c);
                 DEBUG << "added for alias or date " << candidateName << endl;
             }
             return;
@@ -298,14 +329,14 @@
     } else {
         other->addAlias(c->name());
     }
-    composers[c->name().toLower()].insert(other);
+    composers[makeNameKey(c->name())].insert(other);
     DEBUG << "linking from alias " << c->name() << endl;
 
     foreach (QString alias, c->aliases()) {
         if (alias != other->name() && 
             !other->aliases().contains(alias)) {
             other->addAlias(alias);
-            composers[alias.toLower()].insert(other);
+            composers[makeNameKey(alias)].insert(other);
             DEBUG << "linking from alias " << alias << endl;
         }
     }
@@ -753,11 +784,12 @@
         if (!cn) continue;
         if (!cn->composer()) {
             QString cname = cn->composerName();
+            QString key = makeNameKey(cname);
             if (cname != "") {
-                if (!composers.contains(cname.toLower())) {
+                if (!composers.contains(key)) {
                     DEBUG << "Failed to assign Composition to composer: no composer matches name " << cname << endl;
                 } else {
-                    QSet<Composer *> cs = composers[cname.toLower()];
+                    QSet<Composer *> cs = composers[key];
                     if (cs.empty()) {
                         DEBUG << "Failed to assign Composition to composer: no composer matches name " << cname << endl;
                     } else if (cs.size() > 1) {
--- a/import/ImportClassicalArchives.cpp	Thu Dec 10 15:15:40 2009 +0000
+++ b/import/ImportClassicalArchives.cpp	Fri Dec 11 16:10:29 2009 +0000
@@ -209,6 +209,7 @@
     // names (sorry)
     if (!field.contains(",")) return;
 
+    field.replace(QRegExp(" +,"), ",");
     field = field.trimmed();
     names.push_back(field);