changeset 44:ed2befdf1e98

* Index for feature files (e.g. generated by Sonic Annotator)
author Chris Cannam
date Mon, 10 May 2010 17:33:00 +0100
parents a3f731205168
children 0033259c6772
files common/FeatureFileIndex.cpp common/FeatureFileIndex.h common/TypeRegistrar.cpp common/common.pro utilities/the-application/the-application.cpp utilities/the-application/the-application.pro
diffstat 6 files changed, 965 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/common/FeatureFileIndex.cpp	Mon May 10 17:33:00 2010 +0100
@@ -0,0 +1,134 @@
+/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
+
+#include "FeatureFileIndex.h"
+#include "TypeRegistrar.h"
+
+#include <QMutexLocker>
+#include <QDir>
+
+#include "base/TempDirectory.h"
+#include "base/Exceptions.h"
+
+using namespace Dataquay;
+
+
+namespace ClassicalData {
+
+FeatureFileIndex *
+FeatureFileIndex::getInstance()
+{
+    static FeatureFileIndex instance;
+    return &instance;
+}
+
+FeatureFileIndex::FeatureFileIndex() :
+    m_index(0)
+{
+    try {
+	m_indexFileName = getIndexFileName();
+    } catch (DirectoryCreationFailed f) {
+        std::cerr << "FeatureFileIndex: ERROR: Failed to find or create index directory: " << f.what() << std::endl;
+        return;
+    }
+
+    m_index = new BasicStore;
+
+    TypeRegistrar::addMappings(m_index, 0);
+
+    if (QFile(m_indexFileName).exists()) {
+	m_index->import(QUrl::fromLocalFile(m_indexFileName),
+			BasicStore::ImportIgnoreDuplicates);
+	//!!! catch
+    }
+}
+
+FeatureFileIndex::~FeatureFileIndex()
+{
+}
+
+QString
+FeatureFileIndex::getIndexFileName()
+{
+    QDir d = TempDirectory::getInstance()->getContainingPath();
+    QString n("index");
+    QFileInfo fi(d.filePath(n));
+
+    if ((fi.exists() && !fi.isDir()) ||
+        (!fi.exists() && !d.mkdir(n))) {
+        throw DirectoryCreationFailed(fi.filePath());
+    }
+
+    return QDir(fi.filePath()).filePath("features.ttl");
+}
+
+QString
+FeatureFileIndex::getFeatureDirectoryName()
+{
+    QDir d = TempDirectory::getInstance()->getContainingPath();
+    QString n("features");
+    QFileInfo fi(d.filePath(n));
+
+    if ((fi.exists() && !fi.isDir()) ||
+        (!fi.exists() && !d.mkdir(n))) {
+        throw DirectoryCreationFailed(fi.filePath());
+    }
+
+    return fi.filePath();
+}
+
+void
+FeatureFileIndex::loadFor(TrackFile *tf, BasicStore *store)
+{
+    if (!m_index) {
+	std::cerr << "FeatureFileIndex::loadFor: No index!" << std::endl;
+	return;
+    }
+    updateIndex();
+
+    //...
+}
+
+void
+FeatureFileIndex::updateIndex()
+{
+    QMutexLocker locker(&m_mutex);
+    if (!m_index) return;
+
+    QDir featureDir;
+    try {
+	QString s = getFeatureDirectoryName();
+        featureDir = QDir(s);
+    } catch (DirectoryCreationFailed f) {
+        std::cerr << "FeatureFileIndex::updateIndex: ERROR: Failed to find or create feature directory: " << f.what() << std::endl;
+        return;
+    }
+
+    featureDir.setFilter(QDir::Files);
+
+    for (unsigned int i = 0; i < featureDir.count(); ++i) {
+
+        QFileInfo fi(featureDir.filePath(featureDir[i]));
+
+        if (fi.isFile() && fi.isReadable()) {
+	    QUrl fileUrl(QUrl::fromLocalFile(fi.filePath()));
+            try {
+                BasicStore *b = BasicStore::load(fileUrl);
+                Triples ts = b->match
+                    (Triple(Node(), "a", m_index->expand("mo:AudioFile")));
+                foreach (Triple t, ts) {
+                    m_index->add(Triple(Uri(fileUrl), "a", m_index->expand("foaf:Document")));
+                    m_index->add(Triple(Uri(fileUrl), "foaf:primaryTopic", t.a));;
+                }
+            } catch (...) { }
+        }
+    }
+
+    //!!! remove triples from index that refer to nonexistent files?
+
+    std::cerr << "Saving index to " << m_indexFileName.toStdString() << std::endl;
+    m_index->save(m_indexFileName);
+}
+
+
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/common/FeatureFileIndex.h	Mon May 10 17:33:00 2010 +0100
@@ -0,0 +1,38 @@
+/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
+
+#ifndef _CLASSICAL_DATA_FEATURE_FILE_INDEX_H_
+#define _CLASSICAL_DATA_FEATURE_FILE_INDEX_H_
+
+#include "Objects.h"
+
+#include <dataquay/BasicStore.h>
+
+#include <QMutex>
+
+namespace ClassicalData {
+
+class FeatureFileIndex
+{
+public:
+    static FeatureFileIndex *getInstance();
+    
+    FeatureFileIndex();
+    ~FeatureFileIndex();
+
+    void loadFor(TrackFile *, Dataquay::BasicStore *);
+
+private:
+    QMutex m_mutex;
+    QString m_indexFileName;
+    Dataquay::BasicStore *m_index;
+    QString getIndexFileName();
+    QString getFeatureDirectoryName();
+    void updateIndex();
+};
+
+}
+
+#endif
+
+
+    
--- a/common/TypeRegistrar.cpp	Wed Apr 28 15:57:06 2010 +0100
+++ b/common/TypeRegistrar.cpp	Mon May 10 17:33:00 2010 +0100
@@ -179,6 +179,8 @@
 	mapping->addTypeUriPrefixMapping("ClassicalData::Composition", store->expand(":event/"));
 	mapping->addPropertyMapping("ClassicalData::Composition", "composer", store->expand("mo:composer"));
 	mapping->addPropertyMapping("ClassicalData::Composition", "works", store->expand("mo:produced_work"));
+
+	mapping->addTypeMapping("ClassicalData::TrackFile", store->expand("mo:AudioFile"));
     }
 }
 
--- a/common/common.pro	Wed Apr 28 15:57:06 2010 +0100
+++ b/common/common.pro	Mon May 10 17:33:00 2010 +0100
@@ -8,6 +8,6 @@
 
 CONFIG += staticlib
 
-HEADERS += EditDistance.h Objects.h Matcher.h TypeRegistrar.h
-SOURCES += EditDistance.cpp Objects.cpp Matcher.cpp TypeRegistrar.cpp
+HEADERS += EditDistance.h Objects.h Matcher.h TypeRegistrar.h FeatureFileIndex.h
+SOURCES += EditDistance.cpp Objects.cpp Matcher.cpp TypeRegistrar.cpp FeatureFileIndex.cpp
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/utilities/the-application/the-application.cpp	Mon May 10 17:33:00 2010 +0100
@@ -0,0 +1,775 @@
+/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
+
+#include "Objects.h"
+#include "TypeRegistrar.h"
+#include "FeatureFileIndex.h"
+
+#include <dataquay/BasicStore.h>
+#include <dataquay/TransactionalStore.h>
+#include <dataquay/RDFException.h>
+#include <dataquay/objectmapper/ObjectLoader.h>
+#include <dataquay/objectmapper/ObjectStorer.h>
+#include <dataquay/objectmapper/ObjectMapper.h>
+#include <dataquay/objectmapper/TypeMapping.h>
+#include <dataquay/Debug.h>
+
+#include "data/fileio/AudioFileReaderFactory.h"
+#include "data/fileio/AudioFileReader.h"
+#include "base/TempDirectory.h"
+
+#include "Matcher.h"
+
+#include <vamp-hostsdk/PluginLoader.h>
+
+#include <QMultiMap>
+#include <QFileInfo>
+#include <QDir>
+#include <QCoreApplication>
+
+#include <iostream>
+
+using namespace Dataquay;
+using namespace ClassicalData;
+using namespace std;
+using namespace Vamp;
+using namespace Vamp::HostExt;
+
+ostream &operator<<(ostream &target, const QString &str)
+{
+    return target << str.toLocal8Bit().data();
+}
+
+ostream &operator<<(ostream &target, const QUrl &u)
+{
+    return target << "<" << u.toString() << ">";
+}
+
+
+bool
+load(BasicStore *store, QString fileName)
+{
+    QUrl url = QUrl::fromLocalFile(fileName);
+
+    cerr << "Importing from URL " << url << " ...";
+    try {
+	store->import(url, BasicStore::ImportPermitDuplicates);
+    } catch (RDFException e) {
+        cerr << " retrying with explicit ntriples type...";
+        try {
+            store->import(url, BasicStore::ImportPermitDuplicates, "ntriples");
+        } catch (RDFException e) {
+            cerr << "failed" << endl;
+            cerr << "Import failed: " << e.what() << endl;
+            return false;
+        }
+    }
+
+    cerr << " done" << endl;
+    return true;
+}
+
+void
+usage(char *name)
+{
+    int s = 0;
+    for (int i = 0; name[i]; ++i) if (name[i] == '/') s = i + 1;
+    name = name + s;
+    cerr << "Usage:" << endl;
+    cerr << "  " << name << " <input-rdf-file> guess <track.wav> [<track.wav> ...]" << endl;
+    exit(-1);
+}
+
+static QList<Composer *> allComposers;
+static QHash<QString, Composer *> composerAliases;
+static QHash<Uri, Composer *> composerUris;
+static QMap<Composer *, QList<Work *> > worksMap;
+static QList<Work *> allWorks;
+
+void
+show(Composer *c)
+{
+    cout << c->property("uri").value<Uri>() << endl;
+    cout << c->getSortName(true);
+    QString d = c->getDisplayDates();
+    if (d != "") cout << " (" << d << ")";
+    if (!c->nationality().empty() || c->period() != "") {
+        cout << " [";
+        bool first = true;
+        foreach (QString n, c->nationality()) {
+            if (!first) cout << "/";
+            cout << n;
+            first = false;
+        }
+        if (c->period() != "") {
+            if (!first) cout << ", ";
+            cout << c->period();
+        }
+        cout << "]";
+    }
+    if (c->gender() != "") {
+        cout << " *" << c->gender();
+    }
+    if (!worksMap[c].empty()) {
+        cout << " [" << worksMap[c].size() << " work(s)]";
+    }
+    cout << endl;
+    foreach (QString a, c->aliases()) {
+        cout << " - " << a << endl;
+    }
+    if (c->remarks() != "") {
+        cout << " " << c->remarks() << endl;
+    }
+    foreach (Document *d, c->pages()) {
+        cout << d->siteName() << " -> " << d->uri() << endl;
+    }
+    foreach (Uri u, c->otherURIs()) {
+        cout << "Same as " << u << endl;
+    }
+}
+
+void
+showBrief(Composer *c)
+{
+    cout << c->property("uri").value<Uri>() << endl;
+    cout << c->getSortName(false);
+    QString d = c->getDisplayDates();
+    if (d != "") cout << " (" << d << ")";
+    if (!c->nationality().empty() || c->period() != "") {
+        cout << " [";
+        bool first = true;
+        foreach (QString n, c->nationality()) {
+            if (!first) cout << "/";
+            cout << n;
+            first = false;
+        }
+        if (c->period() != "") {
+            if (!first) cout << " ";
+            cout << c->period();
+        }
+        cout << "]";
+    }
+    if (c->gender() != "") {
+        cout << " *" << c->gender();
+    }
+    if (!worksMap[c].empty()) {
+        cout << " [" << worksMap[c].size() << " work(s)]";
+    }
+    cout << endl;
+}
+
+void
+listBrief(QList<Composer *> composers)
+{
+    QMultiMap<QString, Composer *> sorted;
+    foreach (Composer *c, composers) {
+        sorted.insert(c->getSortName(false), c);
+    }
+    foreach (Composer *c, sorted) {
+        showBrief(c);
+    }
+}
+
+void
+listUris(QList<Composer *> composers)
+{
+    QMultiMap<Uri, Composer *> sorted;
+    foreach (Composer *c, composers) {
+        sorted.insert(c->property("uri").value<Uri>(), c);
+    }
+    foreach (Uri uri, sorted.keys()) {
+        cout << uri << endl;
+    }
+}
+
+void
+showSearchResults(QMultiMap<float, Composer *> matches, int count)
+{
+    int n = 0;
+    for (QMultiMap<float, Composer *>::const_iterator i = matches.end();
+         i != matches.begin(); ) {
+        --i;
+        if (i.key() <= 0) continue;
+        cout << endl;
+        if (n == 0) {
+            cout << "Best match:" << endl;
+        } else if (n == 1) {
+            cout << "Other candidate(s):" << endl;
+        }
+        cout << "[" << i.key() << "] ";
+        if (n == 0) show(i.value());
+        else showBrief(i.value());
+        if (++n > count) break;
+    }
+    if (n == 0) cout << "No matches" << endl;
+    cout << endl;
+}
+
+void
+getTrackData(FileSource source, QString &fingerprint, QString &puid,
+             QString &title, QString &maker, AudioFileReader::TagMap &tags)
+{
+    AudioFileReader *reader = AudioFileReaderFactory::createReader(source);
+//    AudioFileReader *reader = AudioFileReaderFactory::createThreadingReader(source);
+    if (!reader || !reader->isOK()) {
+        cerr << "Failed to open audio file" << endl;
+        return;
+    }
+
+    title = reader->getTitle();
+    maker = reader->getMaker();
+//    cout << "File tag title: " << reader->getTitle() << endl;
+//    cout << "File tag maker: " << reader->getMaker() << endl;
+
+    cout << "All tags:" << endl;
+    tags = reader->getTags();
+    for (AudioFileReader::TagMap::const_iterator i = tags.begin();
+         i != tags.end(); ++i) {
+        cout << i->first << " " << i->second << endl;
+    }
+
+    PluginLoader *pl = PluginLoader::getInstance();
+    Plugin *plugin = pl->loadPlugin
+        ("ofa-vamp-plugin:ofa_puid_and_fingerprint", reader->getSampleRate(), PluginLoader::ADAPT_ALL);
+    if (!plugin) {
+        cerr << "Failed to load OFA Vamp plugin" << endl;
+        return;
+    }
+
+    // 135 seconds... well, ok, let's have 136
+    int secs = 136;
+    
+    int want = int(secs * reader->getSampleRate());
+    int ch = reader->getChannelCount();
+    std::vector<SampleBlock> samples;
+    reader->getDeInterleavedFrames(0, want, samples);
+    int have = samples[0].size();
+    if (!plugin->initialise(ch, have, have)) {
+        cerr << "Failed to initialise(" << ch << "," << have << "," << have << ") plugin" << endl;
+        return;
+    }
+    
+    float **input = new float *[ch];
+    for (int i = 0; i < ch; ++i) {
+        input[i] = &samples[i][0];
+    }
+    Plugin::FeatureSet features = plugin->process(input, RealTime::zeroTime);
+    if (!features[0].empty()) {
+        fingerprint = QString::fromStdString(features[0][0].label);
+    }
+    if (!features[1].empty()) {
+        puid = QString::fromStdString(features[1][0].label);
+    }
+    features = plugin->getRemainingFeatures();
+    if (fingerprint == "" && !features[0].empty()) {
+        fingerprint = QString::fromStdString(features[0][0].label);
+    }
+    if (puid == "" && !features[1].empty()) {
+        puid = QString::fromStdString(features[1][0].label);
+    }
+    delete[] input;
+    delete plugin;
+    delete reader;
+}
+
+float
+bonusFactor(NamedEntity *e)
+{
+    // tiny nudge to prefer composers we actually have works for
+    Composer *c = qobject_cast<Composer *>(e);
+    float f = 1.f;
+    int sz = 0;
+    if (c && worksMap.contains(c)) {
+        sz = worksMap[c].size();
+        while (sz > 0) {
+            f += 0.01;
+            sz = sz / 10;
+        }
+    }
+    return f;
+}
+
+void
+integrateGuesses(GuessSet &guesses, GuessSet newGuesses)
+{
+    QHash<NamedEntity *, float> ecmap;
+    foreach (Guess g, guesses) {
+        ecmap[g.entity()] += g.confidence() * bonusFactor(g.entity());
+    }
+    foreach (Guess g, newGuesses) {
+        if (ecmap.contains(g.entity())) {
+            ecmap[g.entity()] += g.confidence() / 2;
+        } else {
+            ecmap[g.entity()] = g.confidence();
+        }
+    }
+    guesses.clear();
+    foreach (NamedEntity *e, ecmap.keys()) {
+        guesses.insert(Guess(ecmap[e], e));
+    }
+}
+
+void
+guessFromMaker(QString maker, float scale, GuessSet &guesses)
+{
+    if (maker == "") return;
+//    cerr << "guessFromMaker: " << maker << endl;
+    GuessSet myGuesses;
+    if (composerAliases.contains(maker)) {
+        QList<Composer *> matching = composerAliases.values(maker);
+        foreach (Composer *c, matching) {
+            myGuesses.insert(Guess(10 * scale, c));
+        }
+    } else {
+        ComposerFullTextMatcher matcher(allComposers);
+        GuessList gl(matcher.match(maker, 5, 0.5));
+        if (!gl.empty()) {
+            foreach (Guess guess, gl) {
+                myGuesses.insert(Guess(guess.confidence() * scale, guess.entity()));
+            }
+        }
+    }
+    integrateGuesses(guesses, myGuesses);
+}
+
+void
+guessFromMakerTag(AudioFileReader::TagMap tags, QString tag, float scale, GuessSet &guesses)
+{
+    if (tags.find(tag) != tags.end()) {
+        guessFromMaker(tags[tag], scale, guesses);
+    }
+}
+
+void
+guessFromTitle(QString title, float scale, GuessSet &guesses)
+{
+    QStringList bits = title.split(QRegExp("[:,_-]"),
+                                   QString::SkipEmptyParts);
+    if (bits.size() > 1) {
+        guessFromMaker(bits.first(), scale, guesses);
+    }
+}    
+
+void
+guessFromTitleTag(AudioFileReader::TagMap tags, QString tag, float scale, GuessSet &guesses)
+{
+    if (tags.find(tag) != tags.end()) {
+        guessFromTitle(tags[tag], scale, guesses);
+    }
+}
+
+void
+guessFromFilename(QString filename, float scale, GuessSet &guesses)
+{
+    cerr << "guessFromFilename: " << filename << endl;
+    QString dirpart = QFileInfo(filename).path();
+    QStringList dirbits = dirpart.split("/", QString::SkipEmptyParts);
+    dirbits = dirbits.last()
+        .replace(QRegExp("^\\d+"), "")
+        .split(QRegExp("[^\\w]"), QString::SkipEmptyParts);
+    if (!dirbits.empty()) {
+        guessFromMaker(dirbits.first(), scale, guesses);
+    }
+
+    QString filepart = QFileInfo(filename).fileName().replace(QRegExp("\\d+"), "");
+    QStringList filebits = filepart.split(QRegExp("[^\\w]"),
+                                          QString::SkipEmptyParts);
+    if (!filebits.empty()) {
+        guessFromMaker(filebits.first(), scale, guesses);
+    }
+}
+
+void
+guessWorkFromTitleByCatalogue(QString title, float scale,
+                              Composer *composer, GuessSet &guesses)
+{
+    if (title == "") return;
+    WorkCatalogueMatcher matcher(composer ? worksMap.value(composer) : allWorks);
+    GuessList gl(matcher.match(title, 0));
+    if (!gl.empty()) {
+        foreach (Guess guess, gl) {
+            guesses.insert(Guess(guess.confidence() * scale, guess.entity()));
+        }
+    }
+}
+
+void
+guessWorkFromTitle(QString title, float scale,
+                   Composer *composer, GuessSet &guesses)
+{
+    if (title == "") return;
+    WorkTitleMatcher matcher(composer ? worksMap.value(composer) : allWorks);
+    GuessList gl(matcher.match(title, 0));
+    if (!gl.empty()) {
+        foreach (Guess guess, gl) {
+            guesses.insert(Guess(guess.confidence() * scale, guess.entity()));
+        }
+    }
+}    
+
+void
+guessWorkFromTitleTag(AudioFileReader::TagMap tags, QString tag, float scale,
+                      Composer *composer, GuessSet &guesses)
+{
+    cerr << "guessWorkFromTitleTag: " << tag << endl;
+
+    if (tags.find(tag) != tags.end()) {
+        cerr << "guessWorkFromTitleTag: tag is " << tags[tag] << endl;
+        GuessSet myGuesses;
+        guessWorkFromTitle(tags[tag], scale, composer, myGuesses);
+        integrateGuesses(guesses, myGuesses);
+        myGuesses.clear();
+        guessWorkFromTitle(tags[tag], scale, composer, myGuesses);
+        integrateGuesses(guesses, myGuesses);
+    }
+}
+
+void
+guessWorkFromFilenameByCatalogue(QString filename, float scale,
+                      Composer *composer, GuessSet &guesses)
+{
+    cerr << "guessWorkFromFilename: " << filename << endl;
+
+    QString dirpart = QFileInfo(filename).path();
+    QStringList dirbits = dirpart.split("/", QString::SkipEmptyParts);
+    if (!dirbits.empty()) {
+        guessWorkFromTitleByCatalogue(dirbits.last(), scale * 0.7, composer, guesses);
+    }
+
+    QString filepart = QFileInfo(filename).fileName().replace
+        (QRegExp("\\.[^\\.]*"), "").replace(QRegExp("^\\d+[^\\w]+"), "");
+    guessWorkFromTitleByCatalogue(filepart, scale, composer, guesses);
+}
+
+void
+guessWorkFromFilenameByTitle(QString filename, float scale,
+                             Composer *composer, GuessSet &guesses)
+{
+    cerr << "guessWorkFromFilename: " << filename << endl;
+
+    QString dirpart = QFileInfo(filename).path();
+    QStringList dirbits = dirpart.split("/", QString::SkipEmptyParts);
+    if (!dirbits.empty()) {
+        guessWorkFromTitle(dirbits.last(), scale * 0.7, composer, guesses);
+    }
+
+    QString filepart = QFileInfo(filename).fileName().replace
+        (QRegExp("\\.[^\\.]*"), "").replace(QRegExp("^\\d+[^\\w]+"), "");
+    guessWorkFromTitle(filepart, scale, composer, guesses);
+}
+
+TrackFile *
+guess(QString track)
+{
+    cout << endl;
+    cout << "Guessing composer for: " << track << endl;
+
+//    cerr << "Creating TrackFile object...";
+    FileSource fs(track);
+    TrackFile *tf = new TrackFile(fs);
+//    cerr << "done" << endl;
+//    cerr << "hash = " << tf->hash() << endl;
+
+    QString fingerprint, puid, maker, title;
+    AudioFileReader::TagMap tags;
+    //!!! bad api!:
+    getTrackData(fs, fingerprint, puid, title, maker, tags);
+
+    cout << "fingerprint: " << fingerprint.toStdString() << ", puid: "
+         << puid.toStdString() << endl;
+
+    GuessSet guesses;
+
+    guessFromMakerTag(tags, "TCOM", 1.0, guesses);
+    guessFromMakerTag(tags, "COMPOSER", 1.0, guesses);
+
+    if (guesses.empty() || guesses.begin()->confidence() < 0.4) {
+        guessFromMakerTag(tags, "TOPE", 0.8, guesses);
+        guessFromMakerTag(tags, "TPE1", 0.8, guesses);
+
+        guessFromMakerTag(tags, "ARTIST", 0.9, guesses);
+        guessFromMakerTag(tags, "PERFORMER", 0.8, guesses);
+
+        guessFromTitleTag(tags, "TIT1", 0.4, guesses);
+        guessFromTitleTag(tags, "TIT2", 0.5, guesses);
+        guessFromTitleTag(tags, "TALB", 0.5, guesses);
+        guessFromTitleTag(tags, "TIT3", 0.3, guesses);
+
+        guessFromTitleTag(tags, "TITLE", 0.5, guesses);
+        guessFromTitleTag(tags, "ALBUM", 0.5, guesses);
+    }
+
+    if (tags.find("MUSICBRAINZ_ARTISTID") != tags.end()) {
+        QString id = tags["MUSICBRAINZ_ARTISTID"];
+        Uri mbzUri = Uri("http://dbtune.org/musicbrainz/resource/artist/" + id);
+        cout << "MBZ id found: " << id << endl;
+        if (composerUris.contains(mbzUri)) {
+            guesses.insert(Guess(2.0, composerUris[mbzUri]));
+        }
+    }
+
+    cerr << "Composer guesses:" << endl;
+    foreach (Guess g, guesses) {
+        cerr << "[" << g.confidence() << "] " << g.entity()->uri() << endl;
+    }
+
+    float bc = 0.f;
+    QString best;
+    if (!guesses.empty()) {
+        Guess bg = *guesses.begin();
+        best = bg.entity()->name();
+        bc = bg.confidence();
+    }
+
+    guessFromFilename(track, 0.5, guesses);
+
+    float bc2 = 0.f;
+    QString best2;
+    if (!guesses.empty()) {
+        Guess bg = *guesses.begin();
+        best2 = bg.entity()->name();
+        bc2 = bg.confidence();
+    }
+
+    // If we have only one confident composer guess, consider only
+    // works from that composer (really this should permit considering
+    // works from all confident composers)
+    Composer *confidentComposer = 0;
+    if (bc2 > 0.5) {
+        confidentComposer = qobject_cast<Composer *>(guesses.begin()->entity());
+    }
+
+    QString bestTitle;
+
+    GuessSet workGuesses;
+    if (tags["TIT2"] != "") {
+        bestTitle = tags["TIT2"];
+        guessWorkFromTitleTag(tags, "TIT2", 0.5, confidentComposer, workGuesses);
+    }
+    if (tags["TITLE"] != "") {
+        bestTitle = tags["TITLE"];
+        guessWorkFromTitleTag(tags, "TITLE", 0.5, confidentComposer, workGuesses);
+    }
+    if (workGuesses.empty()) {
+        guessWorkFromTitleTag(tags, "TIT1", 0.2, confidentComposer, workGuesses);
+        guessWorkFromTitleTag(tags, "TALB", 0.2, confidentComposer, workGuesses);
+        guessWorkFromTitleTag(tags, "TIT3", 0.1, confidentComposer, workGuesses);
+        guessWorkFromTitleTag(tags, "ALBUM", 0.4, confidentComposer, workGuesses);
+    }
+    if (workGuesses.empty() || workGuesses.begin()->confidence() < 0.3) {
+        guessWorkFromFilenameByCatalogue(track, 0.4, confidentComposer, workGuesses);
+    }
+    if (workGuesses.empty()) {
+        guessWorkFromFilenameByTitle(track, 0.3, confidentComposer, workGuesses);
+    }
+    
+    cerr << "Work guesses:" << endl;
+    foreach (Guess g, workGuesses) {
+        cerr << "[" << g.confidence() << "] " << g.entity()->uri() << endl;
+    }
+
+    GuessSet consistentComposers;
+    GuessSet consistentWorks;
+    foreach (Guess wg, workGuesses) {
+        Work *w = qobject_cast<Work *>(wg.entity());
+        if (!w || !w->getComposer()) continue;
+        Composer *wc = w->getComposer();
+        foreach (Guess g, guesses) {
+            if (g.entity() == wc) {
+                consistentComposers.insert(g);
+                consistentWorks.insert(wg);
+            }
+        }
+    }
+
+    cerr << "Consistent composer guesses:" << endl;
+    foreach (Guess g, consistentComposers) {
+        cerr << "[" << g.confidence() << "] " << g.entity()->uri() << endl;
+    }
+
+    cerr << "Consistent work guesses:" << endl;
+    foreach (Guess g, consistentWorks) {
+        cerr << "[" << g.confidence() << "] " << g.entity()->uri() << endl;
+    }
+
+    float bc3 = bc2;
+    QString best3 = best2;
+    QString work;
+    Work *bestWork = 0;
+    if (!consistentWorks.empty()) {
+        Guess bg = *consistentWorks.begin();
+        bestWork = qobject_cast<Work *>(bg.entity());
+        if (bestWork) {
+            bc3 = bg.confidence();
+            best3 = bestWork->getComposerName();
+            work = bestWork->getDisplayName();
+        }
+    }
+
+    cout << track << "|" << best << "|" << bc << "|" << best2 << "|" << bc2 << "|" << best3 << "|" << bc3 << "|" << work << "|" << bestTitle << endl;
+
+    tf->setOfaFingerprint(fingerprint);
+    tf->setPuid(puid);
+    tf->setComposer(confidentComposer);
+    tf->setWork(bestWork);
+    return tf;
+}
+
+
+
+int
+main(int argc, char **argv)
+{
+    //!!! N.B. On Windows this will take the profile path over the
+    //!!! home drive path -- we don't want that
+    QString featuresRelPath(".classical-rdf/features");
+    if (!QDir(QDir::home().filePath(featuresRelPath)).exists() &&
+        !QDir::home().mkpath(featuresRelPath)) {
+        std::cerr << "Features directory $HOME/" << featuresRelPath.toStdString()
+                  << " does not exist and creation failed" << std::endl;
+        return 2;
+    }
+
+    QCoreApplication::setApplicationName("classical-rdf");
+
+    FeatureFileIndex *ffi = FeatureFileIndex::getInstance();
+
+    ffi->loadFor(0, 0);
+    
+/*
+    BasicStore *index = new BasicStore;
+
+    QDir features(QDir::home().filePath(featuresRelPath));
+    features.setFilter(QDir::Files);
+    for (unsigned int i = 0; i < features.count(); ++i) {
+        QFileInfo fi(features.filePath(features[i]));
+        if (fi.isFile() && fi.isReadable()) {
+            std::cout << fi.fileName().toStdString() << std::endl;
+            try {
+                QUrl fileUrl(QUrl::fromLocalFile(fi.filePath()));
+                BasicStore *b = BasicStore::load(fileUrl);
+                Triples ts = b->match
+                    (Triple(Node(), "a",
+                            Uri("http://purl.org/ontology/mo/AudioFile")));
+                foreach (Triple t, ts) {
+                    std::cout << "Audio file: <" << t.a.value.toStdString() << ">" << std::endl;
+                    index->add(Triple(Uri(fileUrl), "a",
+                                      Uri("http://xmlns.com/foaf/0.1/Document")));
+                    index->add(Triple(Uri(fileUrl), 
+                                      Uri("http://xmlns.com/foaf/0.1/primaryTopic"),
+                                      t.a));
+                }
+            } catch (...) { }
+        }
+    }
+
+    
+
+    index->save("index.ttl");
+*/
+/*
+    if (argc < 3) usage(argv[0]);
+    QString inRDFFileName = argv[1];
+    QString command = argv[2];
+    QStringList args;
+    for (int i = 3; i < argc; ++i) {
+        args.push_back(argv[i]);
+    }
+
+    //!!! unit test!
+    int c = Work::compareCatalogueNumberTexts("Op. 1 no 4", "Op. 3 no 2");
+    std::cerr << c << std::endl;
+    c = Work::compareCatalogueNumberTexts("1 no 4", "3 no 2");
+    std::cerr << c << std::endl;
+    c = Work::compareCatalogueNumberTexts("4 no 2", "3 no 2");
+    std::cerr << c << std::endl;
+    c = Work::compareCatalogueNumberTexts("Opus 4 no 2", "3 no 2");
+    std::cerr << c << std::endl;
+    c = Work::compareCatalogueNumberTexts("Op 141", "K. 21");
+    std::cerr << c << std::endl;
+    c = Work::compareCatalogueNumberTexts("Op 14", "K. 21");
+    std::cerr << c << std::endl;
+    c = Work::compareCatalogueNumberTexts("Op 6a", "Op 6");
+    std::cerr << c << std::endl;
+    c = Work::compareCatalogueNumberTexts("Op 6a", "Op 6b");
+    std::cerr << c << std::endl;
+    c = Work::compareCatalogueNumberTexts("Op 6a", "Op 7");
+    std::cerr << c << std::endl;
+    c = Work::compareCatalogueNumberTexts("Hob XXIIId:Es1", "Hob XXII:B04");
+    std::cerr << c << std::endl;
+
+    BasicStore *store = new BasicStore();
+    store->setBaseUri(Uri("http://dbtune.org/classical/resource/"));
+    ObjectLoader *loader = new ObjectLoader(store);
+
+    TypeMapping tm;
+
+    TypeRegistrar::registerTypes();
+    TypeRegistrar::addMappings(store, &tm);
+
+    loader->setTypeMapping(tm);
+
+    if (!load(store, inRDFFileName)) {
+	cerr << "Failed to load data source" << endl;
+	return 1;
+    }
+
+    cerr << "Imported RDF data, mapping to objects...";
+    QObjectList objects = loader->loadAll();
+    cerr << " done" << endl;
+
+    delete loader;
+    
+    foreach (QObject *o, objects) {
+        Composer *c = qobject_cast<Composer *>(o);
+        if (c) {
+            allComposers.push_back(c);
+            composerAliases.insert(c->name(), c);
+            foreach (QString alias, c->aliases()) {
+                composerAliases.insert(alias, c);
+            }
+            composerUris.insert(c->property("uri").value<Uri>(), c);
+            foreach (Uri otherUri, c->otherURIs()) {
+                composerUris.insert(otherUri, c);
+            }
+        }
+    }
+    
+    QList<Work *> works;
+    foreach (QObject *o, objects) {
+        Work *w = qobject_cast<Work *>(o);
+        if (w) works.push_back(w);
+    }
+
+    foreach (Work *w, works) {
+        allWorks.push_back(w);
+        Composition *c = w->composition();
+        if (c) {
+            Composer *cp = c->composer();
+            if (cp) worksMap[cp].push_back(w);
+        }
+    }
+
+    BasicStore localStore;
+    TypeRegistrar::addMappings(&localStore, &tm);
+
+    ObjectStorer *localStorer = new ObjectStorer(&localStore);
+    localStorer->setTypeMapping(tm);
+//    localStorer->setFollowPolicy(ObjectStorer::FollowObjectProperties);
+
+    if (command == "guess") {
+        if (args.empty()) usage(argv[0]);
+        foreach (QString track, args) {
+            TrackFile *tf = guess(track);
+            localStorer->store(tf);
+        }
+    } 
+
+    delete localStorer;
+    localStore.save("local.ttl");
+        
+    delete store;
+*/
+
+    TempDirectory::getInstance()->cleanup();
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/utilities/the-application/the-application.pro	Mon May 10 17:33:00 2010 +0100
@@ -0,0 +1,14 @@
+TEMPLATE = app
+TARGET = the-application
+#QT -= gui network xml
+
+load(../../../all.prf)
+
+SOURCES += the-application.cpp
+
+PRE_TARGETDEPS += ../../common/libcommon.a
+
+INCLUDEPATH += ../../common ../../../svcore
+
+LIBS += ../../common/libcommon.a -L../../../svcore -lsvcore ../../../../turbot/dataquay/libdataquay.a ../../../../turbot/ext/libext.a
+