changeset 37:a8ab8c08a668

* More drudgery on track id
author Chris Cannam
date Tue, 30 Mar 2010 18:12:35 +0100
parents 48d8fec75afb
children ac956912fcdf
files common/Matcher.cpp common/Objects.cpp common/Objects.h utilities/track/track.cpp
diffstat 4 files changed, 165 insertions(+), 69 deletions(-) [+]
line wrap: on
line diff
--- a/common/Matcher.cpp	Tue Mar 30 07:29:08 2010 +0100
+++ b/common/Matcher.cpp	Tue Mar 30 18:12:35 2010 +0100
@@ -239,6 +239,9 @@
     GuessList results;
     GuessSet matches;
 
+    // Throw away any initial numbers (likely to be track index)
+    text = text.replace(QRegExp("^[0-9]+"), "");
+
     QString quoted;
     QRegExp quoteRe("\\b[\"']([^\"]+)[\"']\\b");
     int qthresh = 0;
@@ -311,23 +314,6 @@
             }
 
             if (score > highScore) highScore = score;
-/*
-        if (score == 0.f) {
-            int ml = std::min(name.length(), text.length());
-            int thresh = ml / 4;
-            int dist = ed.calculate(text.left(ml), name.left(ml), thresh);
-            if (dist < thresh) {
-                std::cerr << "title match: " << name.toStdString() << std::endl;
-                score += 0.8f - 0.05f * dist;
-            }
-        }
-
-        if (score > 0.f) {
-
-            // need to avoid high scores for things with differing
-            // numbers, e.g. "Chamber Symphony No. 2" should not score
-            // highly as a match for "Chamber Symphony No. 1"
-            */
         }
 
         if (highScore > 0.f) {
--- a/common/Objects.cpp	Tue Mar 30 07:29:08 2010 +0100
+++ b/common/Objects.cpp	Tue Mar 30 18:12:35 2010 +0100
@@ -19,6 +19,13 @@
 QMap<QString, Form *> Form::m_map;
 QMutex Form::m_mutex;
 
+QString
+Composition::getComposerName() const
+{
+    if (m_composer) return m_composer->name();
+    return m_cname;
+}
+
 bool
 Composer::matchDates(const Composer *b) const
 {
@@ -655,6 +662,14 @@
     emit aliasesChanged(m_aliases);
 }
 
+QString
+Work::getComposerName() const
+{
+    Composer *c = getComposer();
+    if (c) return c->name();
+    else return "";
+}
+
 static int
 compare(QString a, QString b)
 {
@@ -717,12 +732,22 @@
     // with N, because we don't want to treat e.g. "Symphony No. 8"
     // as catalogue number 8.  What a fine hack.
 
-    QRegExp catre("\\b([Oo]pu?s?|[A-MP-Z]+)(\\.\\s*|\\s+)(\\d+\\w*)(\\s+[Nn]([OoRrBb]?|umber)(\\.\\s*|\\s+)(\\d+\\w*))?\\b");
-//    QRegExp catre("\\b([Oo]pu?s?|[A-Z]+)(\\.\\s*|\\s+)(\\d+\\w*)");
+    QRegExp catre("\\b([Oo]pu?s?|[A-MP-Z]+)\\.?[\\s_]*(\\d+\\w*)(\\s+[Nn]([OoRrBb]?|umber)(\\.\\s*|\\s+)(\\d+\\w*))?\\b");
     int ix = 0;
     while ((ix = catre.indexIn(text, ix+1)) >= 0) {
         std::cerr << "extractCatalogueNumberTexts: found match \"" << catre.cap(0).toStdString() << "\"" << std::endl;
-        results.push_back(catre.cap(0));
+        QString cat = catre.cap(0);
+        // ensure space before digit
+        for (int i = 0; i+1 < cat.length(); ++i) {
+            if (!cat[i].isDigit() && !cat[i].isSpace() && cat[i+1].isDigit()) {
+                QString spaced = cat.left(i+1) + " " + cat.right(cat.length()-i-1);
+                std::cerr << "spaced out from " << cat.toStdString() << " to "
+                          << spaced.toStdString() << std::endl;
+                cat = spaced;
+                break;
+            }
+        }
+        results.push_back(cat);
     }
     return results;
 }
@@ -772,6 +797,30 @@
     return rv;
 }
 
+QString
+Work::getDisplayName() const
+{
+    QString suffix;
+
+    if (catalogue() != "") {
+        suffix = catalogue();
+    } else if (opus() != "") {
+        suffix = QString("Op. %1").arg(opus());
+    }
+    if (suffix != "" && number() != "") {
+        suffix = QString("%1 no. %2").arg(suffix).arg(number());
+    }
+    if (suffix != "") {
+        if (name() != "") {
+            return QString("%1, %2").arg(name()).arg(suffix);
+        } else {
+            return suffix;
+        }
+    } else {
+        return name();
+    }
+}
+
 TrackFile::TrackFile(FileSource source, QObject *parent) :
     QObject(parent)
 {
--- a/common/Objects.h	Tue Mar 30 07:29:08 2010 +0100
+++ b/common/Objects.h	Tue Mar 30 18:12:35 2010 +0100
@@ -109,7 +109,7 @@
     
     Q_PROPERTY(ClassicalData::Composer *composer READ composer WRITE setComposer NOTIFY composerChanged STORED true)
     Q_PROPERTY(QSet<ClassicalData::Work *> works READ works WRITE setWorks NOTIFY worksChanged STORED true)
-    Q_PROPERTY(QString composerName READ composerName WRITE setComposerName STORED false)
+    Q_PROPERTY(QString composerName READ getComposerName WRITE setComposerName STORED false)
 
 public:
     Composition() : HistoricalEvent(), m_composer(0) { }
@@ -123,8 +123,10 @@
     void setWorks(QSet<Work *> c) { m_works = c; emit worksChanged(c); }
     void addWork(Work *w) { m_works.insert(w); emit worksChanged(m_works); }
 
-    // Not a storable property, set temporarily while composer record is found
-    QString composerName() const { return m_cname; }
+    // Not a storable property; set temporarily while composer record
+    // is found, or else looked up from composer after composer has
+    // been found
+    QString getComposerName() const;
     void setComposerName(QString n) { m_cname = n; }
 
 signals:
@@ -281,7 +283,7 @@
     const Composition *composition() const { return m_composition; }
     void setComposition(Composition *c) { m_composition = c; emit compositionChanged(c); }
 
-    Composer *composer() {
+    Composer *getComposer() const {
         if (m_composition) {
             return m_composition->composer();
         } else {
@@ -289,6 +291,8 @@
         }
     }
 
+    QString getComposerName() const;
+
     struct Ordering {
         bool operator()(Work *, Work *);
     };
@@ -310,6 +314,9 @@
      */
     static QStringList extractCatalogueNumberTexts(QString data);
 
+    /// Return name composed from title and catalogue, e.g. Symphony no. 4, Op. 42
+    QString getDisplayName() const;
+
 signals:
     void keyChanged(QString);
     void opusChanged(QString);
--- a/utilities/track/track.cpp	Tue Mar 30 07:29:08 2010 +0100
+++ b/utilities/track/track.cpp	Tue Mar 30 18:12:35 2010 +0100
@@ -79,7 +79,7 @@
 static QList<Composer *> allComposers;
 static QHash<QString, Composer *> composerAliases;
 static QHash<Uri, Composer *> composerUris;
-static QMap<Composer *, QSet<Work *> > worksMap;
+static QMap<Composer *, QList<Work *> > worksMap;
 static QList<Work *> allWorks;
 
 void
@@ -358,10 +358,11 @@
 guessFromFilename(QString filename, float scale, GuessSet &guesses)
 {
     cerr << "guessFromFilename: " << filename << endl;
-    QString dirpart = QFileInfo(filename).path().replace(QRegExp("\\d+"), "");
+    QString dirpart = QFileInfo(filename).path();
     QStringList dirbits = dirpart.split("/", QString::SkipEmptyParts);
-    dirbits = dirbits.last().split(QRegExp("[^\\w]"),
-                                   QString::SkipEmptyParts);
+    dirbits = dirbits.last()
+        .replace(QRegExp("^\\d+"), "")
+        .split(QRegExp("[^\\w]"), QString::SkipEmptyParts);
     if (!dirbits.empty()) {
         guessFromMaker(dirbits.first(), scale, guesses);
     }
@@ -374,47 +375,80 @@
     }
 }
 
-void guessWorkFromTitle(QString title, float scale, GuessSet &guesses)
+void
+guessWorkFromTitleByCatalogue(QString title, float scale,
+                              Composer *composer, GuessSet &guesses)
 {
     if (title == "") return;
-    {
-        GuessSet myGuesses;
-        WorkCatalogueMatcher matcher(allWorks);
-        GuessList gl(matcher.match(title, 0));
-        if (!gl.empty()) {
-            foreach (Guess guess, gl) {
-                myGuesses.insert(Guess(guess.confidence() * scale, guess.entity()));
-            }
+    WorkCatalogueMatcher matcher(composer ? worksMap.value(composer) : allWorks);
+    GuessList gl(matcher.match(title, 0));
+    if (!gl.empty()) {
+        foreach (Guess guess, gl) {
+            guesses.insert(Guess(guess.confidence() * scale, guess.entity()));
         }
-        integrateGuesses(guesses, myGuesses);
     }
-    {
-        GuessSet myGuesses;
-        WorkTitleMatcher matcher(allWorks);
-        GuessList gl(matcher.match(title, 0));
-        if (!gl.empty()) {
-            foreach (Guess guess, gl) {
-                myGuesses.insert(Guess(guess.confidence() * scale, guess.entity()));
-            }
+}
+
+void
+guessWorkFromTitle(QString title, float scale,
+                   Composer *composer, GuessSet &guesses)
+{
+    if (title == "") return;
+    WorkTitleMatcher matcher(composer ? worksMap.value(composer) : allWorks);
+    GuessList gl(matcher.match(title, 0));
+    if (!gl.empty()) {
+        foreach (Guess guess, gl) {
+            guesses.insert(Guess(guess.confidence() * scale, guess.entity()));
         }
-        integrateGuesses(guesses, myGuesses);
     }
 }    
 
 void
-guessWorkFromTitleTag(AudioFileReader::TagMap tags, QString tag, float scale, GuessSet &guesses)
+guessWorkFromTitleTag(AudioFileReader::TagMap tags, QString tag, float scale,
+                      Composer *composer, GuessSet &guesses)
 {
     if (tags.find(tag) != tags.end()) {
-        guessWorkFromTitle(tags[tag], scale, guesses);
+        GuessSet myGuesses;
+        guessWorkFromTitle(tags[tag], scale, composer, myGuesses);
+        integrateGuesses(guesses, myGuesses);
+        myGuesses.clear();
+        guessWorkFromTitle(tags[tag], scale, composer, myGuesses);
+        integrateGuesses(guesses, myGuesses);
     }
 }
 
 void
-guessWorkFromFilename(QString filename, float scale, GuessSet &guesses)
+guessWorkFromFilenameByCatalogue(QString filename, float scale,
+                      Composer *composer, GuessSet &guesses)
 {
     cerr << "guessWorkFromFilename: " << filename << endl;
-    QString filepart = QFileInfo(filename).fileName().replace(QRegExp("\\.[^\\.]*"), "").replace(QRegExp("^\\d+[^\\w]+"), "");
-    guessWorkFromTitle(filepart, scale, guesses);
+
+    QString dirpart = QFileInfo(filename).path();
+    QStringList dirbits = dirpart.split("/", QString::SkipEmptyParts);
+    if (!dirbits.empty()) {
+        guessWorkFromTitleByCatalogue(dirbits.last(), scale * 0.7, composer, guesses);
+    }
+
+    QString filepart = QFileInfo(filename).fileName().replace
+        (QRegExp("\\.[^\\.]*"), "").replace(QRegExp("^\\d+[^\\w]+"), "");
+    guessWorkFromTitleByCatalogue(filepart, scale, composer, guesses);
+}
+
+void
+guessWorkFromFilenameByTitle(QString filename, float scale,
+                             Composer *composer, GuessSet &guesses)
+{
+    cerr << "guessWorkFromFilename: " << filename << endl;
+
+    QString dirpart = QFileInfo(filename).path();
+    QStringList dirbits = dirpart.split("/", QString::SkipEmptyParts);
+    if (!dirbits.empty()) {
+        guessWorkFromTitle(dirbits.last(), scale * 0.7, composer, guesses);
+    }
+
+    QString filepart = QFileInfo(filename).fileName().replace
+        (QRegExp("\\.[^\\.]*"), "").replace(QRegExp("^\\d+[^\\w]+"), "");
+    guessWorkFromTitle(filepart, scale, composer, guesses);
 }
 
 void
@@ -487,14 +521,33 @@
         bc2 = bg.confidence();
     }
 
+    // If we have only one confident composer guess, consider only
+    // works from that composer (really this should permit considering
+    // works from all confident composers)
+    Composer *confidentComposer = 0;
+    if (bc2 > 0.5) {
+        confidentComposer = qobject_cast<Composer *>(guesses.begin()->entity());
+    }
+
     GuessSet workGuesses;
-    guessWorkFromTitleTag(tags, "TIT1", 0.4, workGuesses);
-    guessWorkFromTitleTag(tags, "TIT2", 0.5, workGuesses);
-    guessWorkFromTitleTag(tags, "TALB", 0.4, workGuesses);
-    guessWorkFromTitleTag(tags, "TIT3", 0.3, workGuesses);
-    guessWorkFromTitleTag(tags, "TITLE", 0.5, workGuesses);
-    guessWorkFromTitleTag(tags, "ALBUM", 0.4, workGuesses);
-    guessWorkFromFilename(track, 0.4, workGuesses);
+    if (tags["TITLE"] != "") {
+        guessWorkFromTitleTag(tags, "TITLE", 0.5, confidentComposer, workGuesses);
+    }
+    if (tags["TIT2"] != "") {
+        guessWorkFromTitleTag(tags, "TIT2", 0.5, confidentComposer, workGuesses);
+    }
+    if (workGuesses.empty()) {
+        guessWorkFromTitleTag(tags, "TIT1", 0.2, confidentComposer, workGuesses);
+        guessWorkFromTitleTag(tags, "TALB", 0.2, confidentComposer, workGuesses);
+        guessWorkFromTitleTag(tags, "TIT3", 0.1, confidentComposer, workGuesses);
+        guessWorkFromTitleTag(tags, "ALBUM", 0.4, confidentComposer, workGuesses);
+    }
+    if (workGuesses.empty() || workGuesses.begin()->confidence() < 0.3) {
+        guessWorkFromFilenameByCatalogue(track, 0.4, confidentComposer, workGuesses);
+    }
+    if (workGuesses.empty()) {
+        guessWorkFromFilenameByTitle(track, 0.3, confidentComposer, workGuesses);
+    }
     
     cerr << "Work guesses:" << endl;
     foreach (Guess g, workGuesses) {
@@ -505,8 +558,8 @@
     GuessSet consistentWorks;
     foreach (Guess wg, workGuesses) {
         Work *w = qobject_cast<Work *>(wg.entity());
-        if (!w || !w->composer()) continue;
-        Composer *wc = w->composer();
+        if (!w || !w->getComposer()) continue;
+        Composer *wc = w->getComposer();
         foreach (Guess g, guesses) {
             if (g.entity() == wc) {
                 consistentComposers.insert(g);
@@ -528,13 +581,14 @@
     float bc3 = bc2;
     QString best3 = best2;
     QString work;
-    if (!consistentComposers.empty()) {
-        Guess bg = *consistentComposers.begin();
-        best3 = bg.entity()->name();
-        bc3 = bg.confidence();
-        //!!! not ok, no guarantee that this will be the same work as
-        //!!! corresponds to the first composer in consistentComposers
-        work = consistentWorks.begin()->entity()->name();
+    if (!consistentWorks.empty()) {
+        Guess bg = *consistentWorks.begin();
+        Work *w = qobject_cast<Work *>(bg.entity());
+        if (w) {
+            bc3 = bg.confidence();
+            best3 = w->getComposerName();
+            work = w->getDisplayName();
+        }
     }
 
     cout << track << "|" << best << "|" << bc << "|" << best2 << "|" << bc2 << "|" << best3 << "|" << bc3 << "|" << work << endl;
@@ -621,7 +675,7 @@
         Composition *c = w->composition();
         if (c) {
             Composer *cp = c->composer();
-            if (cp) worksMap[cp].insert(w);
+            if (cp) worksMap[cp].push_back(w);
         }
     }