changeset 16:cb315ba61e03 classical-rdf

* Introduce a not very good typing matcher
author Chris Cannam
date Fri, 19 Feb 2010 17:40:17 +0000
parents 701702f8959a
children 06fcbfe2a6ed
files common/EditDistance.h common/Objects.cpp common/Objects.h testapp/Loader.cpp
diffstat 4 files changed, 60 insertions(+), 6 deletions(-) [+]
line wrap: on
line diff
--- a/common/EditDistance.h	Fri Feb 19 14:53:22 2010 +0000
+++ b/common/EditDistance.h	Fri Feb 19 17:40:17 2010 +0000
@@ -17,8 +17,8 @@
     };
 
     EditDistance(TranspositionMode tm = RestrictedTransposition,
-		 int editPenalty = 1,
-		 int suffixPenalty = 1,
+		 int editPenalty = 1,   //!!! probably better to lose this
+		 int suffixPenalty = 1, //!!! probably better to lose this
 		 bool normalise = true) :
 	m_transpositionMode(tm),
 	m_editPenalty(editPenalty),
--- a/common/Objects.cpp	Fri Feb 19 14:53:22 2010 +0000
+++ b/common/Objects.cpp	Fri Feb 19 17:40:17 2010 +0000
@@ -414,6 +414,8 @@
 
         QString reduced = reduceName(elt);
 
+        //!!! these don't seem to match often...
+
         if (m_reducedSurnameElements.contains(reduced)) {
             score += 10;
             haveSurname = true;
@@ -473,6 +475,47 @@
     return fscore;
 }
 
+float
+Composer::matchTyping(QString text) const
+{
+    //!!! not good
+    
+    if (text == "") return 0;
+
+    cacheNames();
+    float fameBonus = m_pages.size() / 10.f;
+    
+    EditDistance ed(EditDistance::RestrictedTransposition, 1, 1, true);
+
+    QString n = name().replace(",", "").toLower();
+    text = text.replace(",", "").toLower();
+
+    int nl = n.length();
+    int tl = text.length();
+
+    float score = 0.f;
+
+    if (n == text) score += 1.f;
+    else if (n.startsWith(text)) score += 0.8f;
+    else if (nl >= tl) {
+        if (tl > 3 && n.contains(text)) {
+            score += 0.3;
+        }
+        int dist = ed.calculate(n.left(text.length()), text, nl/4);
+        if (dist <= nl/4) {
+            score += 0.6 - ((0.5 * dist) / (nl/4));
+        }
+    } else {
+        int dist = ed.calculate(n, text, nl/4);
+        if (dist < nl/4 && dist < 3) score += 0.3;
+    }
+
+    if (text.contains(" ") && matchFuzzyName(text) >= 1.f) score += 0.9;
+    if (score > 0.f) score += fameBonus;
+
+    return score;
+}
+
 static int
 compare(QString a, QString b)
 {
--- a/common/Objects.h	Fri Feb 19 14:53:22 2010 +0000
+++ b/common/Objects.h	Fri Feb 19 17:40:17 2010 +0000
@@ -355,7 +355,8 @@
      * composer name with unpredictable formatting and spelling (and
      * probably incomplete), return an estimate for the likelihood
      * that the intended composer was this one.  Higher return values
-     * indicate greater confidence.
+     * indicate greater confidence; a value of 1.0 or more indicates
+     * that all of the input is at least close to perfectly matched.
      */
     float matchFuzzyName(QString name) const;
 
@@ -364,12 +365,21 @@
      * composer name with unpredictable formatting and spelling (and
      * probably incomplete), return an estimate for the likelihood
      * that the intended composer was this one.  Higher return values
-     * indicate greater confidence.  The supplied name should have
-     * been lower-cased and split on non-alphabetical characters.
+     * indicate greater confidence; a value of 1.0 or more indicates
+     * that all of the input is at least close to perfectly matched.
+     * The supplied name should have been lower-cased and split on
+     * non-alphabetical characters.
      */
     float matchFuzzyName(QStringList name) const;
 
     /**
+     * Given a string that is in the process of being typed by the
+     * user, return an estimate of the likelihood that the text is
+     * intended to become this composer's name.
+     */
+    float matchTyping(QString text) const;
+
+    /**
      * Return the supplied name reduced into a "simplified" form,
      * eliminating many of the differences often found particularly in
      * European language names that have been anglicised.  Used in
--- a/testapp/Loader.cpp	Fri Feb 19 14:53:22 2010 +0000
+++ b/testapp/Loader.cpp	Fri Feb 19 17:40:17 2010 +0000
@@ -105,7 +105,8 @@
 	foreach (QObject *o, composers) {
 	    Composer *c = qobject_cast<Composer *>(o);
 	    if (!c) continue;
-            float value = c->matchFuzzyName(elements);
+//            float value = c->matchFuzzyName(elements);
+            float value = c->matchTyping(QString::fromStdString(s));
             matches.insert(value, c->getSortName(false));
 	}
         int n = 0;