Mercurial > hg > classical
view common/FeatureFileIndex.cpp @ 50:0f9353a69866 nodes_have_hashes
Adjustments to accommodate changes in dataquay nodes_have_hashes code
author | Chris Cannam |
---|---|
date | Mon, 28 Mar 2011 12:47:39 +0100 |
parents | 5f23d5b29aaf |
children |
line wrap: on
line source
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ #include "FeatureFileIndex.h" #include "TypeRegistrar.h" #include <QMutexLocker> #include <QDir> #include "base/TempDirectory.h" #include "base/Exceptions.h" using namespace Dataquay; namespace ClassicalData { FeatureFileIndex * FeatureFileIndex::getInstance() { static FeatureFileIndex instance; return &instance; } FeatureFileIndex::FeatureFileIndex() : m_bs(0), m_index(0) { try { m_indexFileName = getIndexFileName(); } catch (DirectoryCreationFailed f) { std::cerr << "FeatureFileIndex: ERROR: Failed to find or create index directory: " << f.what() << std::endl; return; } m_bs = new BasicStore; m_index = new TransactionalStore(m_bs); TypeRegistrar::addMappings(m_bs, 0); if (QFile(m_indexFileName).exists()) { m_bs->import(QUrl::fromLocalFile(m_indexFileName), BasicStore::ImportIgnoreDuplicates); //!!! catch } } FeatureFileIndex::~FeatureFileIndex() { delete m_index; delete m_bs; } QString FeatureFileIndex::getIndexFileName() { QDir d = TempDirectory::getInstance()->getContainingPath(); QString n("index"); QFileInfo fi(d.filePath(n)); if ((fi.exists() && !fi.isDir()) || (!fi.exists() && !d.mkdir(n))) { throw DirectoryCreationFailed(fi.filePath()); } return QDir(fi.filePath()).filePath("features.ttl"); } QString FeatureFileIndex::getFeatureDirectoryName() { QDir d = TempDirectory::getInstance()->getContainingPath(); QString n("features"); QFileInfo fi(d.filePath(n)); if ((fi.exists() && !fi.isDir()) || (!fi.exists() && !d.mkdir(n))) { throw DirectoryCreationFailed(fi.filePath()); } return fi.filePath(); } void FeatureFileIndex::loadFor(AudioFile *tf, Store *store) { if (!m_index) { std::cerr << "FeatureFileIndex::loadFor: No index!" << std::endl; return; } updateIndex(); QSet<Uri> fileUris; // The same file may be referred to with more than one URI; we // want to load any or all of: the URI in our file object; encoded // version of same; and any other file that is recorded as having // the same hash (i.e. it is the same file). fileUris.insert(tf->uri()); // and again with encoded version of file URI QByteArray enc = QUrl(tf->uri().toString()).toEncoded(); fileUris.insert(Uri(QString::fromUtf8(enc))); // and again with anything else having the same hash if (tf->hash() != "") { Triple pattern(Node(), "foaf:sha1", Node(Node::Literal, tf->hash())); Triples results = m_index->match(pattern); std::cerr << "FeatureFileIndex::loadFor: " << results.size() << " audio file(s) found with hash " << tf->hash().toStdString() << std::endl; foreach (Triple t, results) { fileUris.insert(Uri(t.a.value())); } } foreach (Uri u, fileUris) { loadFor(tf->uri(), u, tf->hash(), store); } } bool FeatureFileIndex::loadFor(Uri canonicalUri, Uri afuri, QString hash, Store *store) { // The AudioFile object has a URI and a hash. Feature files // generated for this AudioFile should ideally have a matching // hash; if they have no hash, then the URI should match. If the // hash is present in the feature file but does not match, then it // cannot be the right track even if the URI matches. Triple pattern(Node(), "foaf:primaryTopic", afuri); Triples results = m_index->match(pattern); std::cerr << "FeatureFileIndex::loadFor: " << results.size() << " feature file(s) for audio file " << afuri << std::endl; bool loadedSomething = false; foreach (Triple t, results) { try { BasicStore *b = BasicStore::load(QUrl(t.a.value())); Triples ts = b->match (Triple(afuri, "a", m_index->expand("mo:AudioFile"))); std::cerr << "FeatureFileIndex::loadFor: feature file " << t.a << " has " << ts.size() << " type node(s) for this audio file" << std::endl; bool someGood = false; foreach (Triple t, ts) { bool good = true; if (hash != "") { Triples hashts = b->match (Triple(afuri, m_index->expand("foaf:sha1"), Node())); std::cerr << "FeatureFileIndex::loadFor: feature file " << t.a << " has " << hashts.size() << " hashes for this file" << std::endl; if (!hashts.empty()) { good = false; foreach (Triple hasht, hashts) { if (hasht.c.value() == hash) { std::cerr << "Hash " << hasht.c << " matches our hash " << hash.toStdString() << std::endl; good = true; break; } } if (!good) { std::cerr << "(no hash matches, eliminating file)" << std::endl; } } else { std::cerr << "(so cannot eliminate file via hash)" << std::endl; } } if (good) { std::cerr << "...going to import this one" << std::endl; someGood = true; } } if (someGood) { Triples all = b->match(Triple()); std::cerr << "Importing " << all.size() << " triple(s) into store" << std::endl; // Replace instances of the audio file URI with our // canonical URI (we want to make sure we're // associating these facts with our own URI for this // file, even if they originated from a different URI // with the same hash) Node from = Node(Node::URI, afuri.toString()); Node to = Node(Node::URI, canonicalUri.toString()); foreach (Triple t, all) { if (t.a == from) t.a = to; if (t.c == from) t.c = to; store->add(t); } loadedSomething = true; } } catch (...) { } } return loadedSomething; } void FeatureFileIndex::featureFileAdded(QString filepath) { index(QUrl::fromLocalFile(filepath)); } void FeatureFileIndex::updateIndex() { QMutexLocker locker(&m_mutex); if (!m_index) return; std::cerr << "Generating index..." << std::endl; QDir featureDir; try { QString s = getFeatureDirectoryName(); featureDir = QDir(s); } catch (DirectoryCreationFailed f) { std::cerr << "FeatureFileIndex::updateIndex: ERROR: Failed to find or create feature directory: " << f.what() << std::endl; return; } featureDir.setFilter(QDir::Files); for (unsigned int i = 0; i < featureDir.count(); ++i) { QFileInfo fi(featureDir.filePath(featureDir[i])); if (fi.isFile() && fi.isReadable()) { index(QUrl::fromLocalFile(fi.filePath())); } } //!!! remove triples from index that refer to nonexistent files? std::cerr << "Saving index to " << m_indexFileName.toStdString() << std::endl; m_bs->save(m_indexFileName); std::cerr << "Done" << std::endl; } void FeatureFileIndex::index(QUrl fileUrl) { Triple typeTriple(Uri(fileUrl), "a", m_index->expand("foaf:Document")); if (m_index->contains(typeTriple)) { return; } Transaction *tx = m_index->startTransaction(); tx->add(typeTriple); try { BasicStore *b = BasicStore::load(fileUrl); Triples ts = b->match (Triple(Node(), "a", m_index->expand("mo:AudioFile"))); foreach (Triple t, ts) { tx->add(Triple(Uri(fileUrl), "foaf:primaryTopic", t.a));; Triples hashts = b->match (Triple(t.a, m_index->expand("foaf:sha1"), Node())); foreach (Triple hasht, hashts) { tx->add(hasht); } } } catch (std::exception &e) { std::cerr << "Caught exception: \"" << e.what() << "\" while indexing " << Uri(fileUrl) << ", skipping" << std::endl; } tx->commit(); delete tx; } }