changeset 46:c8b777862198

* Fixes to cache lookup for audio file features
author Chris Cannam
date Tue, 18 May 2010 17:40:04 +0100
parents 0033259c6772
children 273bd328b215
files common/FeatureFileIndex.cpp common/FeatureFileIndex.h utilities/the-application/the-application.cpp
diffstat 3 files changed, 108 insertions(+), 13 deletions(-) [+]
line wrap: on
line diff
--- a/common/FeatureFileIndex.cpp	Fri May 14 17:58:04 2010 +0100
+++ b/common/FeatureFileIndex.cpp	Tue May 18 17:40:04 2010 +0100
@@ -89,28 +89,107 @@
     }
     updateIndex();
 
+    QSet<Uri> fileUris;
+
+    // The same file may be referred to with more than one URI; we
+    // want to load any or all of: the URI in our file object; encoded
+    // version of same; and any other file that is recorded as having
+    // the same hash (i.e. it is the same file).
+
+    fileUris.insert(tf->uri());
+
+    // and again with encoded version of file URI
+    QByteArray enc = QUrl(tf->uri().toString()).toEncoded();
+    fileUris.insert(Uri(QString::fromUtf8(enc)));
+
+    // and again with anything else having the same hash
+    if (tf->hash() != "") {
+        Triple pattern(Node(), "foaf:sha1", Node(Node::Literal, tf->hash()));
+        Triples results = m_index->match(pattern);
+        std::cerr << "FeatureFileIndex::loadFor: " << results.size() << " audio file(s) found with hash " << tf->hash().toStdString() << std::endl;
+        foreach (Triple t, results) {
+            fileUris.insert(Uri(t.a.value));
+        }
+    }
+
+    foreach (Uri u, fileUris) {
+        loadFor(tf->uri(), u, tf->hash(), store);
+    }
+}
+
+bool
+FeatureFileIndex::loadFor(Uri canonicalUri, Uri afuri,
+                          QString hash, Store *store)
+{
     // The AudioFile object has a URI and a hash.  Feature files
     // generated for this AudioFile should ideally have a matching
     // hash; if they have no hash, then the URI should match.  If the
     // hash is present in the feature file but does not match, then it
     // cannot be the right track even if the URI matches.
 
-    Triple t(Node(), "foaf:primaryTopic", tf->uri());
-    Triples results = m_index->match(t);
-    std::cerr << "FeatureFileIndex::loadFor: " << results.size() << " feature file(s) for audio file " << tf->uri() << std::endl;
+    Triple pattern(Node(), "foaf:primaryTopic", afuri);
+    Triples results = m_index->match(pattern);
+    std::cerr << "FeatureFileIndex::loadFor: " << results.size() << " feature file(s) for audio file " << afuri << std::endl;
 
-    t = Triple(Node(), "foaf:primaryTopic",
-               Uri(QString::fromUtf8(QUrl(tf->uri().toString()).toEncoded())));
-    Triples moreResults = m_index->match(t);
-    std::cerr << "FeatureFileIndex::loadFor: " << moreResults.size() << " feature file(s) for audio file " << t.c << std::endl;
+    bool loadedSomething = false;
 
-    //!!! what's the right approach here?
-
-    if (results.empty() && moreResults.empty()) {
-        return;
+    foreach (Triple t, results) {
+        try {
+            BasicStore *b = BasicStore::load(QUrl(t.a.value));
+            Triples ts = b->match
+                (Triple(afuri, "a", m_index->expand("mo:AudioFile")));
+            std::cerr << "FeatureFileIndex::loadFor: feature file "
+                      << t.a << " has " << ts.size() << " type node(s) for this audio file" << std::endl;
+            bool someGood = false;
+            foreach (Triple t, ts) {
+                bool good = true;
+                if (hash != "") {
+                    Triples hashts = b->match
+                        (Triple(afuri, m_index->expand("foaf:sha1"), Node()));
+                    std::cerr << "FeatureFileIndex::loadFor: feature file "
+                              << t.a << " has " << hashts.size() << " hashes for this file" << std::endl;
+                    if (!hashts.empty()) {
+                        good = false;
+                        foreach (Triple hasht, hashts) {
+                            if (hasht.c.value == hash) {
+                                std::cerr << "Hash " << hasht.c << " matches our hash " << hash.toStdString() << std::endl;
+                                good = true;
+                                break;
+                            }
+                        }
+                        if (!good) {
+                            std::cerr << "(no hash matches, eliminating file)" << std::endl;
+                        }
+                    } else {
+                        std::cerr << "(so cannot eliminate file via hash)" << std::endl;
+                    }
+                }
+                if (good) {
+                    std::cerr << "...going to import this one" << std::endl;
+                    someGood = true;
+                }
+            }
+            if (someGood) {
+                Triples all = b->match(Triple());
+                std::cerr << "Importing " << all.size() << " triple(s) into store" << std::endl;
+                // Replace instances of the audio file URI with our
+                // canonical URI (we want to make sure we're
+                // associating these facts with our own URI for this
+                // file, even if they originated from a different URI
+                // with the same hash)
+                Node from = Node(Node::URI, afuri.toString());
+                Node to = Node(Node::URI, canonicalUri.toString());
+                foreach (Triple t, all) {
+                    if (t.a == from) t.a = to;
+                    if (t.c == from) t.c = to;
+                    store->add(t);
+                }
+                loadedSomething = true;
+            }
+        } catch (...) { }
     }
 
-    
+    return loadedSomething;
 }
 
 void
@@ -125,6 +204,8 @@
     QMutexLocker locker(&m_mutex);
     if (!m_index) return;
 
+    std::cerr << "Generating index..." << std::endl;
+
     QDir featureDir;
     try {
 	QString s = getFeatureDirectoryName();
@@ -147,6 +228,8 @@
 
     std::cerr << "Saving index to " << m_indexFileName.toStdString() << std::endl;
     m_bs->save(m_indexFileName);
+
+    std::cerr << "Done" << std::endl;
 }
 
 void
@@ -167,8 +250,16 @@
             (Triple(Node(), "a", m_index->expand("mo:AudioFile")));
         foreach (Triple t, ts) {
             tx->add(Triple(Uri(fileUrl), "foaf:primaryTopic", t.a));;
+            Triples hashts = b->match
+                (Triple(t.a, m_index->expand("foaf:sha1"), Node()));
+            foreach (Triple hasht, hashts) {
+                tx->add(hasht);
+            }
         }
-    } catch (...) { }
+    } catch (std::exception &e) {
+        std::cerr << "Caught exception: \"" << e.what() << "\" while indexing "
+                  << Uri(fileUrl) << ", skipping" << std::endl;
+    }
 
     delete tx;
 }
--- a/common/FeatureFileIndex.h	Fri May 14 17:58:04 2010 +0100
+++ b/common/FeatureFileIndex.h	Tue May 18 17:40:04 2010 +0100
@@ -34,6 +34,8 @@
     Dataquay::TransactionalStore *m_index;
     QString getIndexFileName();
     QString getFeatureDirectoryName();
+    bool loadFor(Dataquay::Uri canonicalUri, Dataquay::Uri actingUri,
+                 QString hash, Dataquay::Store *);
     void updateIndex();
     void index(QUrl);
 };
--- a/utilities/the-application/the-application.cpp	Fri May 14 17:58:04 2010 +0100
+++ b/utilities/the-application/the-application.cpp	Tue May 18 17:40:04 2010 +0100
@@ -648,6 +648,8 @@
         }
     } 
 
+    std::cerr << "Dumping out our local store to local2.ttl" << std::endl;
+    bs.save("local2.ttl");
     
 /*
     BasicStore *index = new BasicStore;