changeset 35:732fb6b754fb

* More work guessing
author Chris Cannam
date Sat, 27 Mar 2010 12:30:25 +0000
parents 271cbaf6e8d9
children 48d8fec75afb
files common/Matcher.cpp utilities/track/track.cpp
diffstat 2 files changed, 65 insertions(+), 18 deletions(-) [+]
line wrap: on
line diff
--- a/common/Matcher.cpp	Fri Mar 26 13:53:31 2010 +0000
+++ b/common/Matcher.cpp	Sat Mar 27 12:30:25 2010 +0000
@@ -156,18 +156,28 @@
 
     if (maxResults == 0 || matches.size() < maxResults) {
 
-        // Now, for slightly lower marks, test for strings like "Symphony
-        // no 8" at the start of the title
+        // Now, for slightly lower marks, test for strings like
+        // "Symphony no 8" at the start of the title, or after a
+        // colon, slash or dash (e.g. "Brahms: Symphony no 4")
 
-        QRegExp numberRe("^([^\\d]+)\\s+[Nn][a-z]*\\.?\\s*(\\d+)");
+        QRegExp numberRe1("^([^\\d]+)\\s+[Nn][a-z]*\\.?\\s*(\\d+)");
+        QRegExp numberRe2("[/:-]\\s*(\\w[^\\d]+)\\s+[Nn][a-z]*\\.?\\s*(\\d+)");
+        QString tag, number;
 
-        if (numberRe.indexIn(text) >= 0) {
+        if (numberRe1.indexIn(text) >= 0) {
+            tag = numberRe1.cap(1);
+            number = numberRe1.cap(2);
+        } else if (numberRe2.indexIn(text) >= 0) {
+            tag = numberRe2.cap(1);
+            number = numberRe2.cap(2);
+        }
+        
+        if (tag != "") {
 
-            QString tag, number;
-            tag = numberRe.cap(1);
+            std::cerr << "tag = \"" << tag.toStdString() << "\", number = \""
+                      << number.toStdString() << "\"" << std::endl;
+
             tag.replace(QRegExp("[^\\w\\s]+"), "");
-            number = numberRe.cap(2);
-
             QString matcherReStr =
                 QString("^%1\\s+[Nn][a-z]*\\.?\\s*%2\\b").arg(tag).arg(number);
             QRegExp matcherRe(matcherReStr, Qt::CaseInsensitive);
@@ -258,11 +268,26 @@
                 int dist = ed.calculate(quoted, q, qthresh);
                 if (dist < qthresh) {
                     std::cerr << "quoted name match: " << q.toStdString() << std::endl;
-                    score += 0.7f;
+                    score += 0.7f - 0.1f * dist;
                 }
             }
         }
 
+        if (score == 0.f) {
+            int ml = std::min(name.length(), text.length());
+            int thresh = ml / 4;
+            int dist = ed.calculate(text.left(ml), name.left(ml), thresh);
+            if (dist < thresh) {
+                std::cerr << "title match: " << name.toStdString() << std::endl;
+                score += 0.8f - 0.05f * dist;
+            }
+        }
+
+        //!!! how to avoid high scores for things that we should be
+        //!!! able to recognise as different? e.g. "Chamber Symphony
+        //!!! No. 2" scoring very highly as a match for "Chamber
+        //!!! Symphony No. 1"
+        
         if (score > 0.f) {
             matches.insert(Guess(score, w));
         }
--- a/utilities/track/track.cpp	Fri Mar 26 13:53:31 2010 +0000
+++ b/utilities/track/track.cpp	Sat Mar 27 12:30:25 2010 +0000
@@ -377,15 +377,28 @@
 void guessWorkFromTitle(QString title, float scale, GuessSet &guesses)
 {
     if (title == "") return;
-    GuessSet myGuesses;
-    WorkCatalogueMatcher matcher(allWorks);
-    GuessList gl(matcher.match(title, 0));
-    if (!gl.empty()) {
-        foreach (Guess guess, gl) {
-            myGuesses.insert(Guess(guess.confidence() * scale, guess.entity()));
+    {
+        GuessSet myGuesses;
+        WorkCatalogueMatcher matcher(allWorks);
+        GuessList gl(matcher.match(title, 0));
+        if (!gl.empty()) {
+            foreach (Guess guess, gl) {
+                myGuesses.insert(Guess(guess.confidence() * scale, guess.entity()));
+            }
         }
+        integrateGuesses(guesses, myGuesses);
     }
-    integrateGuesses(guesses, myGuesses);
+    {
+        GuessSet myGuesses;
+        WorkTitleMatcher matcher(allWorks);
+        GuessList gl(matcher.match(title, 0));
+        if (!gl.empty()) {
+            foreach (Guess guess, gl) {
+                myGuesses.insert(Guess(guess.confidence() * scale, guess.entity()));
+            }
+        }
+        integrateGuesses(guesses, myGuesses);
+    }
 }    
 
 void
@@ -397,6 +410,14 @@
 }
 
 void
+guessWorkFromFilename(QString filename, float scale, GuessSet &guesses)
+{
+    cerr << "guessWorkFromFilename: " << filename << endl;
+    QString filepart = QFileInfo(filename).fileName().replace(QRegExp("\\.[^\\.]*"), "");
+    guessWorkFromTitle(filepart, scale, guesses);
+}
+
+void
 guess(QString track)
 {
     cout << endl;
@@ -469,10 +490,11 @@
     GuessSet workGuesses;
     guessWorkFromTitleTag(tags, "TIT1", 0.4, workGuesses);
     guessWorkFromTitleTag(tags, "TIT2", 0.5, workGuesses);
-    guessWorkFromTitleTag(tags, "TALB", 0.5, workGuesses);
+    guessWorkFromTitleTag(tags, "TALB", 0.4, workGuesses);
     guessWorkFromTitleTag(tags, "TIT3", 0.3, workGuesses);
     guessWorkFromTitleTag(tags, "TITLE", 0.5, workGuesses);
-    guessWorkFromTitleTag(tags, "ALBUM", 0.5, workGuesses);
+    guessWorkFromTitleTag(tags, "ALBUM", 0.4, workGuesses);
+    guessWorkFromFilename(track, 0.4, workGuesses);
     
     cerr << "Work guesses:" << endl;
     foreach (Guess g, workGuesses) {