changeset 19:559a001e1bf5 classical-rdf

* greatly improve matchTyping function for Composer
author Chris Cannam
date Mon, 22 Feb 2010 18:09:55 +0000
parents c8ef23d3888c
children c4cb65c436ef
files common/Objects.cpp common/Objects.h
diffstat 2 files changed, 56 insertions(+), 27 deletions(-) [+]
line wrap: on
line diff
--- a/common/Objects.cpp	Mon Feb 22 14:18:30 2010 +0000
+++ b/common/Objects.cpp	Mon Feb 22 18:09:55 2010 +0000
@@ -470,43 +470,72 @@
 }
 
 float
-Composer::matchTyping(QString text) const
+Composer::matchTyping(QString t) const
 {
-    //!!! not good
-    
-    if (text == "") return 0;
+    if (t == "") return 0;
 
     cacheNames();
-    float fameBonus = m_pages.size() / 10.f;
+    float fameBonus = m_pages.size() / 40.f;
     
-    EditDistance ed(EditDistance::RestrictedTransposition, 1, 1, true);
+    QString n = name().replace(",", "").toLower();
+    t = t.replace(",", "").toLower();
 
-    QString n = name().replace(",", "").toLower();
-    text = text.replace(",", "").toLower();
-
-    int nl = n.length();
-    int tl = text.length();
-
+    if (n == t) return 1.f + fameBonus;
+    if (n.startsWith(t)) return 0.8f + fameBonus;
+    
     float score = 0.f;
 
-    if (n == text) score += 1.f;
-    else if (n.startsWith(text)) score += 0.8f;
-    else if (nl >= tl) {
-        if (tl > 3 && n.contains(text)) {
-            score += 0.3;
+    static QRegExp sre("[\\., -]+");
+    QStringList nl = n.split(sre, QString::SkipEmptyParts);
+    QStringList tl = t.split(sre, QString::SkipEmptyParts);
+    if (nl.empty() || tl.empty()) return 0.f;
+    
+    int unmatched = 0;
+    for (int i = 0; i < tl.size(); ++i) {
+        int ni = 0;
+        for (ni = 0; ni < nl.size(); ++ni) {
+            if (tl[i] == nl[ni]) {
+                if (tl[i].length() > 1) {
+                    score += 0.2;
+                } else {
+                    score += 0.1;
+                }
+            } else if (nl[ni].startsWith(tl[i])) {
+                score += 0.1;
+            } else if (nl[ni].startsWith(tl[i][0])) {
+                score += 0.03;
+            } else {
+                continue;
+            }
+            break;
         }
-        int dist = ed.calculate(n.left(text.length()), text, nl/4);
-        if (dist <= nl/4) {
-            score += 0.6 - ((0.5 * dist) / (nl/4));
+        if (ni == nl.size()+1) {
+            ++unmatched;
         }
-    } else {
-        int dist = ed.calculate(n, text, nl/4);
-        if (dist < nl/4 && dist < 3) score += 0.3;
+    }
+    if (nl[0] == tl[0]) {
+        score += 0.2;
+    }
+    if (score > unmatched * 0.1) {
+        score -= unmatched * 0.1;
+    } else if (score > 0.1) {
+        score = 0.1;
     }
 
-    if (text.contains(" ") && matchFuzzyName(text) >= 1.f) score += 0.9;
+    float fuzzyScore = matchFuzzyName(t);
+    if (t.contains(" ") && fuzzyScore >= 1.f) score += 0.4;
+
+    if (score == 0.f) {
+        EditDistance ed(EditDistance::RestrictedTransposition, 1, 1, true);
+        if (nl.length() > tl.length()) {
+            int dist = calculateThresholdedDistance(ed, t, n.left(t.length()));
+            if (dist >= 0 && dist < 3) score += (3 - dist) * 0.05;
+        } else {
+            score += fuzzyScore / 10.f;
+        }
+    }
+
     if (score > 0.f) score += fameBonus;
-
     return score;
 }
 
--- a/common/Objects.h	Mon Feb 22 14:18:30 2010 +0000
+++ b/common/Objects.h	Mon Feb 22 18:09:55 2010 +0000
@@ -3,6 +3,8 @@
 #ifndef _CLASSICAL_DATA_OBJECTS_H_
 #define _CLASSICAL_DATA_OBJECTS_H_
 
+#include <dataquay/Uri.h>
+
 #include <QObject>
 #include <QMetaType>
 #include <QString>
@@ -13,8 +15,6 @@
 #include <QMutexLocker>
 #include <QMap>
 
-#include <dataquay/Uri.h>
-
 namespace ClassicalData {
 
 class HistoricalEvent : public QObject