view common/FeatureFileIndex.cpp @ 48:5f23d5b29aaf

* Add original tags to AudioFile
author Chris Cannam
date Wed, 02 Jun 2010 17:29:47 +0100
parents c8b777862198
children 0f9353a69866 e0e12bd2978d
line wrap: on
line source
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

#include "FeatureFileIndex.h"
#include "TypeRegistrar.h"

#include <QMutexLocker>
#include <QDir>

#include "base/TempDirectory.h"
#include "base/Exceptions.h"

using namespace Dataquay;


namespace ClassicalData {

FeatureFileIndex *
FeatureFileIndex::getInstance()
{
    static FeatureFileIndex instance;
    return &instance;
}

FeatureFileIndex::FeatureFileIndex() :
    m_bs(0),
    m_index(0)
{
    try {
	m_indexFileName = getIndexFileName();
    } catch (DirectoryCreationFailed f) {
        std::cerr << "FeatureFileIndex: ERROR: Failed to find or create index directory: " << f.what() << std::endl;
        return;
    }

    m_bs = new BasicStore;
    m_index = new TransactionalStore(m_bs);

    TypeRegistrar::addMappings(m_bs, 0);

    if (QFile(m_indexFileName).exists()) {
	m_bs->import(QUrl::fromLocalFile(m_indexFileName),
                     BasicStore::ImportIgnoreDuplicates);
	//!!! catch
    }
}

FeatureFileIndex::~FeatureFileIndex()
{
    delete m_index;
    delete m_bs;
}

QString
FeatureFileIndex::getIndexFileName()
{
    QDir d = TempDirectory::getInstance()->getContainingPath();
    QString n("index");
    QFileInfo fi(d.filePath(n));

    if ((fi.exists() && !fi.isDir()) ||
        (!fi.exists() && !d.mkdir(n))) {
        throw DirectoryCreationFailed(fi.filePath());
    }

    return QDir(fi.filePath()).filePath("features.ttl");
}

QString
FeatureFileIndex::getFeatureDirectoryName()
{
    QDir d = TempDirectory::getInstance()->getContainingPath();
    QString n("features");
    QFileInfo fi(d.filePath(n));

    if ((fi.exists() && !fi.isDir()) ||
        (!fi.exists() && !d.mkdir(n))) {
        throw DirectoryCreationFailed(fi.filePath());
    }

    return fi.filePath();
}

void
FeatureFileIndex::loadFor(AudioFile *tf, Store *store)
{
    if (!m_index) {
	std::cerr << "FeatureFileIndex::loadFor: No index!" << std::endl;
	return;
    }
    updateIndex();

    QSet<Uri> fileUris;

    // The same file may be referred to with more than one URI; we
    // want to load any or all of: the URI in our file object; encoded
    // version of same; and any other file that is recorded as having
    // the same hash (i.e. it is the same file).

    fileUris.insert(tf->uri());

    // and again with encoded version of file URI
    QByteArray enc = QUrl(tf->uri().toString()).toEncoded();
    fileUris.insert(Uri(QString::fromUtf8(enc)));

    // and again with anything else having the same hash
    if (tf->hash() != "") {
        Triple pattern(Node(), "foaf:sha1", Node(Node::Literal, tf->hash()));
        Triples results = m_index->match(pattern);
        std::cerr << "FeatureFileIndex::loadFor: " << results.size() << " audio file(s) found with hash " << tf->hash().toStdString() << std::endl;
        foreach (Triple t, results) {
            fileUris.insert(Uri(t.a.value));
        }
    }

    foreach (Uri u, fileUris) {
        loadFor(tf->uri(), u, tf->hash(), store);
    }
}

bool
FeatureFileIndex::loadFor(Uri canonicalUri, Uri afuri,
                          QString hash, Store *store)
{
    // The AudioFile object has a URI and a hash.  Feature files
    // generated for this AudioFile should ideally have a matching
    // hash; if they have no hash, then the URI should match.  If the
    // hash is present in the feature file but does not match, then it
    // cannot be the right track even if the URI matches.

    Triple pattern(Node(), "foaf:primaryTopic", afuri);
    Triples results = m_index->match(pattern);
    std::cerr << "FeatureFileIndex::loadFor: " << results.size() << " feature file(s) for audio file " << afuri << std::endl;

    bool loadedSomething = false;

    foreach (Triple t, results) {
        try {
            BasicStore *b = BasicStore::load(QUrl(t.a.value));
            Triples ts = b->match
                (Triple(afuri, "a", m_index->expand("mo:AudioFile")));
            std::cerr << "FeatureFileIndex::loadFor: feature file "
                      << t.a << " has " << ts.size() << " type node(s) for this audio file" << std::endl;
            bool someGood = false;
            foreach (Triple t, ts) {
                bool good = true;
                if (hash != "") {
                    Triples hashts = b->match
                        (Triple(afuri, m_index->expand("foaf:sha1"), Node()));
                    std::cerr << "FeatureFileIndex::loadFor: feature file "
                              << t.a << " has " << hashts.size() << " hashes for this file" << std::endl;
                    if (!hashts.empty()) {
                        good = false;
                        foreach (Triple hasht, hashts) {
                            if (hasht.c.value == hash) {
                                std::cerr << "Hash " << hasht.c << " matches our hash " << hash.toStdString() << std::endl;
                                good = true;
                                break;
                            }
                        }
                        if (!good) {
                            std::cerr << "(no hash matches, eliminating file)" << std::endl;
                        }
                    } else {
                        std::cerr << "(so cannot eliminate file via hash)" << std::endl;
                    }
                }
                if (good) {
                    std::cerr << "...going to import this one" << std::endl;
                    someGood = true;
                }
            }
            if (someGood) {
                Triples all = b->match(Triple());
                std::cerr << "Importing " << all.size() << " triple(s) into store" << std::endl;
                // Replace instances of the audio file URI with our
                // canonical URI (we want to make sure we're
                // associating these facts with our own URI for this
                // file, even if they originated from a different URI
                // with the same hash)
                Node from = Node(Node::URI, afuri.toString());
                Node to = Node(Node::URI, canonicalUri.toString());
                foreach (Triple t, all) {
                    if (t.a == from) t.a = to;
                    if (t.c == from) t.c = to;
                    store->add(t);
                }
                loadedSomething = true;
            }
        } catch (...) { }
    }

    return loadedSomething;
}

void
FeatureFileIndex::featureFileAdded(QString filepath)
{
    index(QUrl::fromLocalFile(filepath));
}    

void
FeatureFileIndex::updateIndex()
{
    QMutexLocker locker(&m_mutex);
    if (!m_index) return;

    std::cerr << "Generating index..." << std::endl;

    QDir featureDir;
    try {
	QString s = getFeatureDirectoryName();
        featureDir = QDir(s);
    } catch (DirectoryCreationFailed f) {
        std::cerr << "FeatureFileIndex::updateIndex: ERROR: Failed to find or create feature directory: " << f.what() << std::endl;
        return;
    }

    featureDir.setFilter(QDir::Files);

    for (unsigned int i = 0; i < featureDir.count(); ++i) {
        QFileInfo fi(featureDir.filePath(featureDir[i]));
        if (fi.isFile() && fi.isReadable()) {
            index(QUrl::fromLocalFile(fi.filePath()));
        }
    }

    //!!! remove triples from index that refer to nonexistent files?

    std::cerr << "Saving index to " << m_indexFileName.toStdString() << std::endl;
    m_bs->save(m_indexFileName);

    std::cerr << "Done" << std::endl;
}

void
FeatureFileIndex::index(QUrl fileUrl)
{
    Triple typeTriple(Uri(fileUrl), "a", m_index->expand("foaf:Document"));

    if (m_index->contains(typeTriple)) {
        return;
    }

    Transaction *tx = m_index->startTransaction();
    tx->add(typeTriple);

    try {
        BasicStore *b = BasicStore::load(fileUrl);
        Triples ts = b->match
            (Triple(Node(), "a", m_index->expand("mo:AudioFile")));
        foreach (Triple t, ts) {
            tx->add(Triple(Uri(fileUrl), "foaf:primaryTopic", t.a));;
            Triples hashts = b->match
                (Triple(t.a, m_index->expand("foaf:sha1"), Node()));
            foreach (Triple hasht, hashts) {
                tx->add(hasht);
            }
        }
    } catch (std::exception &e) {
        std::cerr << "Caught exception: \"" << e.what() << "\" while indexing "
                  << Uri(fileUrl) << ", skipping" << std::endl;
    }

    tx->commit();
    delete tx;
}


}