# HG changeset patch # User Chris Cannam # Date 1274200804 -3600 # Node ID c8b777862198f64f11753317d9899cc32c810561 # Parent 0033259c6772c4683db4a540ad732520ce2dd510 * Fixes to cache lookup for audio file features diff -r 0033259c6772 -r c8b777862198 common/FeatureFileIndex.cpp --- a/common/FeatureFileIndex.cpp Fri May 14 17:58:04 2010 +0100 +++ b/common/FeatureFileIndex.cpp Tue May 18 17:40:04 2010 +0100 @@ -89,28 +89,107 @@ } updateIndex(); + QSet fileUris; + + // The same file may be referred to with more than one URI; we + // want to load any or all of: the URI in our file object; encoded + // version of same; and any other file that is recorded as having + // the same hash (i.e. it is the same file). + + fileUris.insert(tf->uri()); + + // and again with encoded version of file URI + QByteArray enc = QUrl(tf->uri().toString()).toEncoded(); + fileUris.insert(Uri(QString::fromUtf8(enc))); + + // and again with anything else having the same hash + if (tf->hash() != "") { + Triple pattern(Node(), "foaf:sha1", Node(Node::Literal, tf->hash())); + Triples results = m_index->match(pattern); + std::cerr << "FeatureFileIndex::loadFor: " << results.size() << " audio file(s) found with hash " << tf->hash().toStdString() << std::endl; + foreach (Triple t, results) { + fileUris.insert(Uri(t.a.value)); + } + } + + foreach (Uri u, fileUris) { + loadFor(tf->uri(), u, tf->hash(), store); + } +} + +bool +FeatureFileIndex::loadFor(Uri canonicalUri, Uri afuri, + QString hash, Store *store) +{ // The AudioFile object has a URI and a hash. Feature files // generated for this AudioFile should ideally have a matching // hash; if they have no hash, then the URI should match. If the // hash is present in the feature file but does not match, then it // cannot be the right track even if the URI matches. - Triple t(Node(), "foaf:primaryTopic", tf->uri()); - Triples results = m_index->match(t); - std::cerr << "FeatureFileIndex::loadFor: " << results.size() << " feature file(s) for audio file " << tf->uri() << std::endl; + Triple pattern(Node(), "foaf:primaryTopic", afuri); + Triples results = m_index->match(pattern); + std::cerr << "FeatureFileIndex::loadFor: " << results.size() << " feature file(s) for audio file " << afuri << std::endl; - t = Triple(Node(), "foaf:primaryTopic", - Uri(QString::fromUtf8(QUrl(tf->uri().toString()).toEncoded()))); - Triples moreResults = m_index->match(t); - std::cerr << "FeatureFileIndex::loadFor: " << moreResults.size() << " feature file(s) for audio file " << t.c << std::endl; + bool loadedSomething = false; - //!!! what's the right approach here? - - if (results.empty() && moreResults.empty()) { - return; + foreach (Triple t, results) { + try { + BasicStore *b = BasicStore::load(QUrl(t.a.value)); + Triples ts = b->match + (Triple(afuri, "a", m_index->expand("mo:AudioFile"))); + std::cerr << "FeatureFileIndex::loadFor: feature file " + << t.a << " has " << ts.size() << " type node(s) for this audio file" << std::endl; + bool someGood = false; + foreach (Triple t, ts) { + bool good = true; + if (hash != "") { + Triples hashts = b->match + (Triple(afuri, m_index->expand("foaf:sha1"), Node())); + std::cerr << "FeatureFileIndex::loadFor: feature file " + << t.a << " has " << hashts.size() << " hashes for this file" << std::endl; + if (!hashts.empty()) { + good = false; + foreach (Triple hasht, hashts) { + if (hasht.c.value == hash) { + std::cerr << "Hash " << hasht.c << " matches our hash " << hash.toStdString() << std::endl; + good = true; + break; + } + } + if (!good) { + std::cerr << "(no hash matches, eliminating file)" << std::endl; + } + } else { + std::cerr << "(so cannot eliminate file via hash)" << std::endl; + } + } + if (good) { + std::cerr << "...going to import this one" << std::endl; + someGood = true; + } + } + if (someGood) { + Triples all = b->match(Triple()); + std::cerr << "Importing " << all.size() << " triple(s) into store" << std::endl; + // Replace instances of the audio file URI with our + // canonical URI (we want to make sure we're + // associating these facts with our own URI for this + // file, even if they originated from a different URI + // with the same hash) + Node from = Node(Node::URI, afuri.toString()); + Node to = Node(Node::URI, canonicalUri.toString()); + foreach (Triple t, all) { + if (t.a == from) t.a = to; + if (t.c == from) t.c = to; + store->add(t); + } + loadedSomething = true; + } + } catch (...) { } } - + return loadedSomething; } void @@ -125,6 +204,8 @@ QMutexLocker locker(&m_mutex); if (!m_index) return; + std::cerr << "Generating index..." << std::endl; + QDir featureDir; try { QString s = getFeatureDirectoryName(); @@ -147,6 +228,8 @@ std::cerr << "Saving index to " << m_indexFileName.toStdString() << std::endl; m_bs->save(m_indexFileName); + + std::cerr << "Done" << std::endl; } void @@ -167,8 +250,16 @@ (Triple(Node(), "a", m_index->expand("mo:AudioFile"))); foreach (Triple t, ts) { tx->add(Triple(Uri(fileUrl), "foaf:primaryTopic", t.a));; + Triples hashts = b->match + (Triple(t.a, m_index->expand("foaf:sha1"), Node())); + foreach (Triple hasht, hashts) { + tx->add(hasht); + } } - } catch (...) { } + } catch (std::exception &e) { + std::cerr << "Caught exception: \"" << e.what() << "\" while indexing " + << Uri(fileUrl) << ", skipping" << std::endl; + } delete tx; } diff -r 0033259c6772 -r c8b777862198 common/FeatureFileIndex.h --- a/common/FeatureFileIndex.h Fri May 14 17:58:04 2010 +0100 +++ b/common/FeatureFileIndex.h Tue May 18 17:40:04 2010 +0100 @@ -34,6 +34,8 @@ Dataquay::TransactionalStore *m_index; QString getIndexFileName(); QString getFeatureDirectoryName(); + bool loadFor(Dataquay::Uri canonicalUri, Dataquay::Uri actingUri, + QString hash, Dataquay::Store *); void updateIndex(); void index(QUrl); }; diff -r 0033259c6772 -r c8b777862198 utilities/the-application/the-application.cpp --- a/utilities/the-application/the-application.cpp Fri May 14 17:58:04 2010 +0100 +++ b/utilities/the-application/the-application.cpp Tue May 18 17:40:04 2010 +0100 @@ -648,6 +648,8 @@ } } + std::cerr << "Dumping out our local store to local2.ttl" << std::endl; + bs.save("local2.ttl"); /* BasicStore *index = new BasicStore;