changeset 33:84d6acb6b3ba

* Bit more work on track composer identification
author Chris Cannam
date Mon, 22 Mar 2010 16:41:01 +0000
parents abd5d022c85d
children 271cbaf6e8d9
files common/Matcher.cpp common/Matcher.h common/Objects.cpp common/Objects.h common/common.pro testapp/testapp.pro utilities/composer/composer.pro utilities/track/track.cpp utilities/widgettest/widgettest.pro widgets/widgets.pro
diffstat 10 files changed, 258 insertions(+), 46 deletions(-) [+]
line wrap: on
line diff
--- a/common/Matcher.cpp	Fri Mar 19 12:21:59 2010 +0000
+++ b/common/Matcher.cpp	Mon Mar 22 16:41:01 2010 +0000
@@ -9,22 +9,21 @@
 
 namespace ClassicalData {
 
-ComposerTypingQuickMatcher::ComposerTypingQuickMatcher(QList<Composer *> cl)
+ComposerTypingQuickMatcher::ComposerTypingQuickMatcher(QList<Composer *> cl) :
+    m_composers(cl)
 {
-    foreach (Composer *c, cl) {
-	m_composers[c->property("uri").value<Uri>()] = c;
-    }
 }
 
 GuessList
-ComposerTypingQuickMatcher::match(QString text, int maxResults) const
+ComposerTypingQuickMatcher::match(QString text, int maxResults,
+                                  float threshold) const
 {
     GuessList results;
 
     QMap<Guess, int> matches;
     foreach (Composer *c, m_composers) {
         float value = c->matchTypingQuick(text);
-        if (value <= 0) continue;
+        if (value < threshold) continue;
         matches.insert(Guess(value, c), 1);
     }
     
@@ -38,22 +37,49 @@
     return results;
 }
 
-ComposerTypingThoroughMatcher::ComposerTypingThoroughMatcher(QList<Composer *> cl)
+ComposerTypingThoroughMatcher::ComposerTypingThoroughMatcher(QList<Composer *> cl) :
+    m_composers(cl)
 {
-    foreach (Composer *c, cl) {
-	m_composers[c->property("uri").value<Uri>()] = c;
-    }
 }
 
 GuessList
-ComposerTypingThoroughMatcher::match(QString text, int maxResults) const
+ComposerTypingThoroughMatcher::match(QString text, int maxResults,
+                                     float threshold) const
 {
     GuessList results;
 
     QMap<Guess, int> matches;
     foreach (Composer *c, m_composers) {
         float value = c->matchTyping(text);
-        if (value <= 0) continue;
+        if (value < threshold) continue;
+        matches.insert(Guess(value, c), 1);
+    }
+    
+    int n = 0;
+    for (QMap<Guess, int>::const_iterator i = matches.begin();
+         i != matches.end(); ++i) {
+        results.push_back(i.key());
+        if (++n > maxResults) break;
+    }
+
+    return results;
+}
+
+ComposerFullTextMatcher::ComposerFullTextMatcher(QList<Composer *> cl) :
+    m_composers(cl)
+{
+}
+
+GuessList
+ComposerFullTextMatcher::match(QString text, int maxResults,
+                               float threshold) const
+{
+    GuessList results;
+
+    QMap<Guess, int> matches;
+    foreach (Composer *c, m_composers) {
+        float value = c->matchFuzzyName(text);
+        if (value < threshold) continue;
         matches.insert(Guess(value, c), 1);
     }
     
--- a/common/Matcher.h	Fri Mar 19 12:21:59 2010 +0000
+++ b/common/Matcher.h	Mon Mar 22 16:41:01 2010 +0000
@@ -39,27 +39,41 @@
 public:
     // Results are guaranteed to be returned in order from most to
     // least confident
-    virtual GuessList match(QString text, int maxResults) const = 0;
+    virtual GuessList match(QString text, int maxResults,
+                            float threshold = 0.f) const = 0;
 };
 
 class ComposerTypingQuickMatcher : public Matcher
 {
 public:
     ComposerTypingQuickMatcher(QList<Composer *> cl);
-    virtual GuessList match(QString text, int maxResults) const;
+    virtual GuessList match(QString text, int maxResults,
+                            float threshold = 0.f) const;
 
 private:
-    QHash<Dataquay::Uri, Composer *> m_composers;
+    QList<Composer *> m_composers;
 };
 
 class ComposerTypingThoroughMatcher : public Matcher
 {
 public:
     ComposerTypingThoroughMatcher(QList<Composer *> cl);
-    virtual GuessList match(QString text, int maxResults) const;
+    virtual GuessList match(QString text, int maxResults,
+                            float threshold = 0.f) const;
 
 private:
-    QHash<Dataquay::Uri, Composer *> m_composers;
+    QList<Composer *> m_composers;
+};
+
+class ComposerFullTextMatcher : public Matcher
+{
+public:
+    ComposerFullTextMatcher(QList<Composer *> cl);
+    virtual GuessList match(QString text, int maxResults,
+                            float threshold = 0.f) const;
+
+private:
+    QList<Composer *> m_composers;
 };
 
 }
--- a/common/Objects.cpp	Fri Mar 19 12:21:59 2010 +0000
+++ b/common/Objects.cpp	Mon Mar 22 16:41:01 2010 +0000
@@ -10,6 +10,9 @@
 #include "EditDistance.h"
 
 #include <QHash> // to ensure correct qHash(const QString &) is found
+#include <QFile>
+#include <QFileInfo>
+#include <QCryptographicHash>
 
 namespace ClassicalData {
 
@@ -748,6 +751,33 @@
     return rv;
 }
 
+TrackFile::TrackFile(FileSource source, QObject *parent) :
+    QObject(parent)
+{
+    if (source.isAvailable()) {
+        QFile f(source.getLocalFilename());
+        f.open(QIODevice::ReadOnly);
+        //!!! data may be too large!
+        QByteArray ba = f.readAll();
+        m_hash = QString::fromAscii
+            (QCryptographicHash::hash(ba, QCryptographicHash::Sha1).toHex());
+    }
+    QString location = source.getLocation();
+    if (source.isRemote()) {
+        m_uri = Dataquay::Uri(location);
+    } else {
+        if (location.contains("://")) {
+            m_uri = Dataquay::Uri(location);
+        } else if (location.startsWith('/')) {
+            m_uri = Dataquay::Uri("file://" + location);
+        } else {
+            m_uri = Dataquay::Uri("file://" + QFileInfo(location).canonicalFilePath());
+        }
+    }
+    std::cerr << "TrackFile::TrackFile: hash = " << m_hash.toStdString()
+              << ", uri = " << m_uri.toString().toStdString() << std::endl;
+}
+
 
 }
 
--- a/common/Objects.h	Fri Mar 19 12:21:59 2010 +0000
+++ b/common/Objects.h	Mon Mar 22 16:41:01 2010 +0000
@@ -18,6 +18,8 @@
 
 #include <iostream>
 
+#include "data/fileio/FileSource.h"
+
 namespace ClassicalData {
 
 //!!! need to review ownership more carefully -- and use deletion notify signals?
@@ -172,6 +174,7 @@
     Q_OBJECT
 
     Q_PROPERTY(QString name READ name WRITE setName NOTIFY nameChanged STORED true)
+    Q_PROPERTY(Dataquay::Uri uri READ uri WRITE setUri NOTIFY uriChanged STORED true)
     Q_PROPERTY(QSet<QString> aliases READ aliases WRITE setAliases NOTIFY aliasesChanged STORED true)
     Q_PROPERTY(QString remarks READ remarks WRITE setRemarks NOTIFY remarksChanged STORED true)
     Q_PROPERTY(QSet<ClassicalData::Document*> pages READ pages WRITE setPages NOTIFY pagesChanged STORED true)
@@ -179,17 +182,13 @@
 
 public:
     NamedEntity(QObject *parent = 0) : QObject(parent) { }
-    ~NamedEntity() {
-/*
-        for (QSet<Document *>::iterator i = m_pages.begin();
-             i != m_pages.end(); ++i) {
-            delete *i;
-        }
-*/
-    }
+    ~NamedEntity() { }
 
     QString name() const { return m_name; }
     virtual void setName(QString n) { m_name = n; emit nameChanged(n); }
+    
+    Dataquay::Uri uri() const { return m_uri; }
+    void setUri(Dataquay::Uri uri) { m_uri = uri; emit uriChanged(uri); }
 
     QString remarks() const { return m_remarks; }
     void setRemarks(QString n) { m_remarks = n; emit remarksChanged(n); }
@@ -201,12 +200,6 @@
     QSet<Document *> pages() const { return m_pages; }
     void addPage(Document *p) { m_pages.insert(p); emit pagesChanged(m_pages); }
     void setPages(QSet<Document *> p) {
-/*
-        for (QSet<Document *>::iterator i = m_pages.begin();
-             i != m_pages.end(); ++i) {
-            delete *i;
-        }
-*/
         m_pages = p;
         emit pagesChanged(p);
     }
@@ -216,6 +209,7 @@
     void setOtherURIs(QSet<Dataquay::Uri> u) { m_otherURIs = u; emit otherURIsChanged(u); }
 
 signals:
+    void uriChanged(Dataquay::Uri);
     void nameChanged(QString);
     void remarksChanged(QString);
     void aliasesChanged(QSet<QString>);
@@ -223,10 +217,11 @@
     void otherURIsChanged(QSet<Dataquay::Uri>);
 
 protected:
+    Dataquay::Uri m_uri;
     QString m_name;
     QString m_remarks;
     QSet<QString> m_aliases;
-    QSet<Document *> m_pages; // we own these
+    QSet<Document *> m_pages;
     QSet<Dataquay::Uri> m_otherURIs;
 };
 
@@ -555,6 +550,69 @@
     static QMutex m_mutex;
 };
 
+class TrackFile : public QObject
+{
+    Q_OBJECT
+
+    Q_PROPERTY(QString hash READ hash WRITE setHash NOTIFY hashChanged STORED true)
+    Q_PROPERTY(Dataquay::Uri uri READ uri WRITE setUri NOTIFY uriChanged STORED true)
+    Q_PROPERTY(QString ofaFingerprint READ ofaFingerprint WRITE setOfaFingerprint NOTIFY ofaFingerprintChanged STORED true)
+    Q_PROPERTY(QString puid READ puid WRITE setPuid NOTIFY puidChanged STORED true)
+    Q_PROPERTY(ClassicalData::Composer *composer READ composer WRITE setComposer NOTIFY composerChanged STORED true)
+    Q_PROPERTY(ClassicalData::Work *work READ work WRITE setWork NOTIFY workChanged STORED true)
+    Q_PROPERTY(ClassicalData::Movement *movement READ movement WRITE setMovement NOTIFY movementChanged STORED true)
+    Q_PROPERTY(QSet<Dataquay::Uri> otherURIs READ otherURIs WRITE setOtherURIs NOTIFY otherURIsChanged STORED true)
+
+public:
+    TrackFile(QObject *parent = 0) : QObject(parent) { }
+    TrackFile(FileSource file, QObject *parent = 0);
+    
+    QString hash() const { return m_hash; }
+    void setHash(QString hash) { m_hash = hash; emit hashChanged(hash); }
+
+    Dataquay::Uri uri() const { return m_uri; }
+    void setUri(Dataquay::Uri u) { m_uri = u; emit uriChanged(u); }
+
+    QString ofaFingerprint() const { return m_ofaFingerprint; }
+    void setOfaFingerprint(QString fprint) { m_ofaFingerprint = fprint; emit ofaFingerprintChanged(fprint); }
+    
+    QString puid() const { return m_puid; }
+    void setPuid(QString puid) { m_puid = puid; emit puidChanged(puid); }
+
+    Composer *composer() const { return m_composer; }
+    void setComposer(Composer *c) { m_composer = c; emit composerChanged(c); }
+
+    Work *work() const { return m_work; }
+    void setWork(Work *w) { m_work = w; emit workChanged(w); }
+
+    Movement *movement() const { return m_movement; }
+    void setMovement(Movement *m) { m_movement = m; emit movementChanged(m); }
+
+    QSet<Dataquay::Uri> otherURIs() const { return m_otherURIs; }
+    void addOtherURI(Dataquay::Uri u) { m_otherURIs.insert(u); emit otherURIsChanged(m_otherURIs); }
+    void setOtherURIs(QSet<Dataquay::Uri> u) { m_otherURIs = u; emit otherURIsChanged(u); }
+
+signals:
+    void uriChanged(Dataquay::Uri);
+    void hashChanged(QString);
+    void ofaFingerprintChanged(QString);
+    void puidChanged(QString);
+    void composerChanged(Composer *);
+    void workChanged(Work *);
+    void movementChanged(Movement *);
+    void otherURIsChanged(QSet<Dataquay::Uri>);
+
+private:
+    Dataquay::Uri m_uri;
+    QString m_hash;
+    QString m_ofaFingerprint;
+    QString m_puid;
+    Composer *m_composer;
+    Work *m_work;
+    Movement *m_movement;
+    QSet<Dataquay::Uri> m_otherURIs;
+};
+
 }
 
 Q_DECLARE_METATYPE(ClassicalData::Year);
--- a/common/common.pro	Fri Mar 19 12:21:59 2010 +0000
+++ b/common/common.pro	Mon Mar 22 16:41:01 2010 +0000
@@ -3,6 +3,9 @@
 
 load(../../all.prf)
 
+DEPENDPATH += ../../svcore
+INCLUDEPATH += ../../svcore
+
 CONFIG += staticlib
 
 HEADERS += EditDistance.h Objects.h Matcher.h TypeRegistrar.h
--- a/testapp/testapp.pro	Fri Mar 19 12:21:59 2010 +0000
+++ b/testapp/testapp.pro	Mon Mar 22 16:41:01 2010 +0000
@@ -9,7 +9,10 @@
 
 INCLUDEPATH += ../common
 
+DEPENDPATH += ../../svcore
+INCLUDEPATH += ../../svcore
+
 PRE_TARGETDEPS += ../common/libcommon.a
 
-LIBS += ../common/libcommon.a ../../../turbot/dataquay/libdataquay.a ../../../turbot/ext/libext.a
+LIBS += ../common/libcommon.a -L../../svcore -lsvcore ../../../turbot/dataquay/libdataquay.a ../../../turbot/ext/libext.a
 
--- a/utilities/composer/composer.pro	Fri Mar 19 12:21:59 2010 +0000
+++ b/utilities/composer/composer.pro	Mon Mar 22 16:41:01 2010 +0000
@@ -10,5 +10,8 @@
 
 INCLUDEPATH += ../../common
 
-LIBS += ../../common/libcommon.a ../../../../turbot/dataquay/libdataquay.a ../../../../turbot/ext/libext.a
+DEPENDPATH += ../../../svcore
+INCLUDEPATH += ../../../svcore
 
+LIBS += ../../common/libcommon.a -L../../../svcore -lsvcore ../../../../turbot/dataquay/libdataquay.a ../../../../turbot/ext/libext.a
+
--- a/utilities/track/track.cpp	Fri Mar 19 12:21:59 2010 +0000
+++ b/utilities/track/track.cpp	Mon Mar 22 16:41:01 2010 +0000
@@ -14,6 +14,9 @@
 
 #include "data/fileio/AudioFileReaderFactory.h"
 #include "data/fileio/AudioFileReader.h"
+#include "base/TempDirectory.h"
+
+#include "Matcher.h"
 
 #include <vamp-hostsdk/PluginLoader.h>
 
@@ -74,6 +77,8 @@
 }
 
 static QList<Composer *> allComposers;
+static QHash<QString, Composer *> composerAliases;
+static QHash<Uri, Composer *> composerUris;
 static QMap<Composer *, QSet<Work *> > worksMap;
 
 void
@@ -196,20 +201,22 @@
 }
 
 void
-guess(QString track)
+getTrackData(FileSource source, QString &fingerprint, QString &puid,
+             QString &title, QString &maker, AudioFileReader::TagMap &tags)
 {
-    cout << "Guessing composer for: " << track << endl;
-    AudioFileReader *reader = AudioFileReaderFactory::createReader(track);
+    AudioFileReader *reader = AudioFileReaderFactory::createReader(source);
     if (!reader || !reader->isOK()) {
         cerr << "Failed to open audio file" << endl;
         return;
     }
 
+    title = reader->getTitle();
+    maker = reader->getMaker();
     cout << "File tag title: " << reader->getTitle() << endl;
     cout << "File tag maker: " << reader->getMaker() << endl;
 
     cout << "All tags:" << endl;
-    AudioFileReader::TagMap tags = reader->getTags();
+    tags = reader->getTags();
     for (AudioFileReader::TagMap::const_iterator i = tags.begin();
          i != tags.end(); ++i) {
         cout << i->first << " " << i->second << endl;
@@ -241,17 +248,68 @@
         input[i] = &samples[i][0];
     }
     Plugin::FeatureSet features = plugin->process(input, RealTime::zeroTime);
-    std::string fingerprint;
-    std::string puid;
-    if (!features[0].empty()) fingerprint = features[0][0].label;
-    if (!features[1].empty()) puid = features[1][0].label;
+    if (!features[0].empty()) {
+        fingerprint = QString::fromStdString(features[0][0].label);
+    }
+    if (!features[1].empty()) {
+        puid = QString::fromStdString(features[1][0].label);
+    }
     features = plugin->getRemainingFeatures();
-    if (fingerprint == "" && !features[0].empty()) fingerprint = features[0][0].label;
-    if (puid == "" && !features[1].empty()) puid = features[1][0].label;
+    if (fingerprint == "" && !features[0].empty()) {
+        fingerprint = QString::fromStdString(features[0][0].label);
+    }
+    if (puid == "" && !features[1].empty()) {
+        puid = QString::fromStdString(features[1][0].label);
+    }
     std::cerr << "fingerprint = " << fingerprint << std::endl;
     std::cerr << "puid = " << puid << std::endl;
 }
 
+void
+guess(QString track)
+{
+    cout << "Guessing composer for: " << track << endl;
+
+    cerr << "Creating TrackFile object...";
+    FileSource fs(track);
+    TrackFile *tf = new TrackFile(fs);
+    cerr << "done" << endl;
+    cerr << "hash = " << tf->hash() << endl;
+
+    QString fingerprint, puid, maker, title;
+    AudioFileReader::TagMap tags;
+    //!!! bad api!:
+    getTrackData(fs, fingerprint, puid, title, maker, tags);
+
+    if (maker != "") {
+        cout << "Name found: " << maker << endl;
+        if (composerAliases.contains(maker)) {
+            QList<Composer *> matching = composerAliases.values(maker);
+            foreach (Composer *c, matching) {
+                cout << "Matched URI: " << c->uri() << endl;
+            }
+        } else {
+            ComposerFullTextMatcher matcher(allComposers);
+            GuessList gl(matcher.match(maker, 5, 0.5));
+            if (!gl.empty()) {
+                foreach (Guess guess, gl) {
+                    cout << "Possibly matched URI (score = " << guess.confidence() << "): " << guess.entity()->uri() << endl;
+                }
+            }
+        }
+    }
+
+    if (tags.find("MUSICBRAINZ_ARTISTID") != tags.end()) {
+        QString id = tags["MUSICBRAINZ_ARTISTID"];
+        Uri mbzUri = Uri("http://dbtune.org/musicbrainz/resource/artist/" + id);
+        cout << "MBZ id found: " << id << endl;
+        if (composerUris.contains(mbzUri)) {
+            cout << "Matched URI: " << composerUris[mbzUri]->uri() << endl;
+        }
+    }
+
+    
+}
 
 int
 main(int argc, char **argv)
@@ -288,7 +346,17 @@
     
     foreach (QObject *o, objects) {
         Composer *c = qobject_cast<Composer *>(o);
-        if (c) allComposers.push_back(c);
+        if (c) {
+            allComposers.push_back(c);
+            composerAliases.insert(c->name(), c);
+            foreach (QString alias, c->aliases()) {
+                composerAliases.insert(alias, c);
+            }
+            composerUris.insert(c->property("uri").value<Uri>(), c);
+            foreach (Uri otherUri, c->otherURIs()) {
+                composerUris.insert(otherUri, c);
+            }
+        }
     }
     
     QList<Work *> works;
@@ -313,5 +381,6 @@
     } 
         
     delete store;
+    TempDirectory::getInstance()->cleanup();
 }
 
--- a/utilities/widgettest/widgettest.pro	Fri Mar 19 12:21:59 2010 +0000
+++ b/utilities/widgettest/widgettest.pro	Mon Mar 22 16:41:01 2010 +0000
@@ -7,7 +7,10 @@
 
 INCLUDEPATH += ../../widgets ../../common
 
-LIBS += ../../widgets/libwidgets.a ../../common/libcommon.a ../../../../turbot/dataquay/libdataquay.a ../../../../turbot/ext/libext.a
+DEPENDPATH += ../../../svcore
+INCLUDEPATH += ../../../svcore
+
+LIBS += ../../widgets/libwidgets.a ../../common/libcommon.a -L../../../svcore -lsvcore ../../../../turbot/dataquay/libdataquay.a ../../../../turbot/ext/libext.a
 
 HEADERS += widgettest.h
 SOURCES += widgettest.cpp
--- a/widgets/widgets.pro	Fri Mar 19 12:21:59 2010 +0000
+++ b/widgets/widgets.pro	Mon Mar 22 16:41:01 2010 +0000
@@ -7,6 +7,9 @@
 
 INCLUDEPATH += ../common
 
+DEPENDPATH += ../../svcore
+INCLUDEPATH += ../../svcore
+
 HEADERS += TypingSelectWidget.h
 SOURCES += TypingSelectWidget.cpp