changeset 610:e21a3db643af

MORE MEMORY SANITY Move the logic tracking which points have been visited already (including the std::set datastructure) into the indexed query codepaths, rather than inside accumulators. This has the effect of drastically reducing the memory used in non-indexed queries, such that the working set for a 500-file database with 100000 vectors total goes from 1.2GB to slightly under 3MB. All this and less code, too!
author mas01cr
date Fri, 28 Aug 2009 17:14:06 +0000
parents 368320b31db6
children 957dd4fb2599
files audioDB-internals.h dbaccumulator.h nearestaccumulator.h pertrackaccumulator.h query-indexed.cpp query.cpp
diffstat 6 files changed, 55 insertions(+), 69 deletions(-) [+]
line wrap: on
line diff
--- a/audioDB-internals.h	Fri Aug 21 15:23:32 2009 +0000
+++ b/audioDB-internals.h	Fri Aug 28 17:14:06 2009 +0000
@@ -67,21 +67,6 @@
   double *mean_duration;
 } adb_qpointers_internal_t;
 
-/* this struct is for maintaining per-query state.  We don't want to
- * store this stuff in the adb struct itself, because (a) it doesn't
- * belong there and (b) in principle people might do two queries in
- * parallel using the same adb handle.  (b) is in practice a little
- * bit academic because at the moment we're seeking all over the disk
- * using adb->fd, but changing to use pread() might win us
- * threadsafety eventually.
- */
-typedef struct adb_qstate_internal {
-  Accumulator *accumulator;
-  std::set<std::string> *allowed_keys;
-  std::priority_queue<PointPair> *exact_evaluation_queue;
-  LSH *lsh;
-} adb_qstate_internal_t;
-
 /* this struct is the in-memory representation of the binary
  * information stored at the head of each adb file */
 typedef struct adbheader {
@@ -168,6 +153,22 @@
   }
 } adb_result_triple_lt;
 
+/* this struct is for maintaining per-query state.  We don't want to
+ * store this stuff in the adb struct itself, because (a) it doesn't
+ * belong there and (b) in principle people might do two queries in
+ * parallel using the same adb handle.  (b) is in practice a little
+ * bit academic because at the moment we're seeking all over the disk
+ * using adb->fd, but changing to use pread() might win us
+ * threadsafety eventually.
+ */
+typedef struct adb_qstate_internal {
+  Accumulator *accumulator;
+  std::set<std::string> *allowed_keys;
+  std::priority_queue<PointPair> *exact_evaluation_queue;
+  std::set< adb_result_t, adb_result_triple_lt > *set;
+  LSH *lsh;
+} adb_qstate_internal_t;
+
 /* We could go gcc-specific here and use typeof() instead of passing
  * in an explicit type.  Answers on a postcard as to whether that's a
  * good plan or not. */
--- a/dbaccumulator.h	Fri Aug 21 15:23:32 2009 +0000
+++ b/dbaccumulator.h	Fri Aug 28 17:14:06 2009 +0000
@@ -7,32 +7,24 @@
 private:
   unsigned int pointNN;
   std::priority_queue< adb_result_t, std::vector<adb_result_t>, T > *queue;
-  std::set< adb_result_t, adb_result_triple_lt > *set;
 };
 
 template <class T> DBAccumulator<T>::DBAccumulator(unsigned int pointNN)
-  : pointNN(pointNN), queue(0), set(0) {
+  : pointNN(pointNN), queue(0) {
   queue = new std::priority_queue< adb_result_t, std::vector<adb_result_t>, T>;
-  set = new std::set<adb_result_t, adb_result_triple_lt>;
 }
 
 template <class T> DBAccumulator<T>::~DBAccumulator() {
   if(queue) {
     delete queue;
   }
-  if(set) {
-    delete set;
-  }
 }
 
 template <class T> void DBAccumulator<T>::add_point(adb_result_t *r) {
   if(!isnan(r->dist)) {
-    if(set->find(*r) == set->end()) {
-      set->insert(*r);
-      queue->push(*r);
-      if(queue->size() > pointNN) {
-        queue->pop();
-      }
+    queue->push(*r);
+    if(queue->size() > pointNN) {
+      queue->pop();
     }
   }
 }
--- a/nearestaccumulator.h	Fri Aug 21 15:23:32 2009 +0000
+++ b/nearestaccumulator.h	Fri Aug 28 17:14:06 2009 +0000
@@ -5,20 +5,15 @@
   void add_point(adb_result_t *r);
   adb_query_results_t *get_points();
 private:
-  std::set< adb_result_t, adb_result_triple_lt > *set;
   std::set< adb_result_t, adb_result_qpos_lt > *points;
 };
 
 template <class T> NearestAccumulator<T>::NearestAccumulator()
-  : set(0), points(0) {
-  set = new std::set< adb_result_t, adb_result_triple_lt >;
+  : points(0) {
   points = new std::set< adb_result_t, adb_result_qpos_lt >;
 }
 
 template <class T> NearestAccumulator<T>::~NearestAccumulator() {
-  if(set) {
-    delete set;
-  }
   if(points) {
     delete points;
   }
@@ -26,17 +21,13 @@
 
 template <class T> void NearestAccumulator<T>::add_point(adb_result_t *r) {
   if(!isnan(r->dist)) {
-    if(set->find(*r) == set->end()) {
-      set->insert(*r);
-
-      std::set< adb_result_t, adb_result_qpos_lt >::iterator it;
-      it = points->find(*r);
-      if(it == points->end()) {
-        points->insert(*r);
-      } else if(T()(*(const adb_result_t *)r,(*it))) {
-        points->erase(it);
-        points->insert(*r);
-      }
+    std::set< adb_result_t, adb_result_qpos_lt >::iterator it;
+    it = points->find(*r);
+    if(it == points->end()) {
+      points->insert(*r);
+    } else if(T()(*(const adb_result_t *)r,(*it))) {
+      points->erase(it);
+      points->insert(*r);
     }
   }
 }
--- a/pertrackaccumulator.h	Fri Aug 21 15:23:32 2009 +0000
+++ b/pertrackaccumulator.h	Fri Aug 28 17:14:06 2009 +0000
@@ -8,13 +8,11 @@
   unsigned int pointNN;
   unsigned int trackNN;
   std::map<adb_result_t, std::priority_queue< adb_result_t, std::vector<adb_result_t>, T > *, adb_result_key_lt> *queues;
-  std::set< adb_result_t, adb_result_triple_lt > *set;
 };
 
 template <class T> PerTrackAccumulator<T>::PerTrackAccumulator(unsigned int pointNN, unsigned int trackNN)
-  : pointNN(pointNN), trackNN(trackNN), queues(0), set(0) {
+  : pointNN(pointNN), trackNN(trackNN), queues(0) {
   queues = new std::map<adb_result_t, std::priority_queue< adb_result_t, std::vector<adb_result_t>, T > *, adb_result_key_lt>;
-  set = new std::set< adb_result_t, adb_result_triple_lt >;
 }
 
 template <class T> PerTrackAccumulator<T>::~PerTrackAccumulator() {
@@ -25,30 +23,23 @@
     }
     delete queues;
   }
-  if(set) {
-    delete set;
-  }
 }
 
 template <class T> void PerTrackAccumulator<T>::add_point(adb_result_t *r) {
   if(!isnan(r->dist)) {
-    if(set->find(*r) == set->end()) {
-      set->insert(*r);
-
-      typename std::map< adb_result_t, std::priority_queue< adb_result_t, std::vector< adb_result_t >, T > *, adb_result_key_lt>::iterator it;
-      std::priority_queue< adb_result_t, std::vector< adb_result_t >, T > *queue;
-      it = queues->find(*r);
-      if(it == queues->end()) {
-        queue = new std::priority_queue< adb_result_t, std::vector< adb_result_t >, T >;
-        (*queues)[*r] = queue;
-      } else {
-        queue = (*it).second;
-      }
-
-      queue->push(*r);
-      if(queue->size() > pointNN) {
-        queue->pop();
-      }
+    typename std::map< adb_result_t, std::priority_queue< adb_result_t, std::vector< adb_result_t >, T > *, adb_result_key_lt>::iterator it;
+    std::priority_queue< adb_result_t, std::vector< adb_result_t >, T > *queue;
+    it = queues->find(*r);
+    if(it == queues->end()) {
+      queue = new std::priority_queue< adb_result_t, std::vector< adb_result_t >, T >;
+      (*queues)[*r] = queue;
+    } else {
+      queue = (*it).second;
+    }
+    
+    queue->push(*r);
+    if(queue->size() > pointNN) {
+      queue->pop();
     }
   }
 }
--- a/query-indexed.cpp	Fri Aug 21 15:23:32 2009 +0000
+++ b/query-indexed.cpp	Fri Aug 28 17:14:06 2009 +0000
@@ -54,7 +54,10 @@
     r.dist = dist;
     r.qpos = qpos;
     r.ipos = spos;
-    qstate->accumulator->add_point(&r);
+    if(qstate->set->find(r) == qstate->set->end()) {
+      qstate->set->insert(r);
+      qstate->accumulator->add_point(&r);
+    }
   }
 }
 
@@ -95,6 +98,8 @@
   bool use_absolute_threshold = spec->refine.flags & ADB_REFINE_ABSOLUTE_THRESHOLD;
   double absolute_threshold = spec->refine.absolute_threshold;
 
+  qstate->set = new std::set< adb_result_t, adb_result_triple_lt >;
+
   if(spec->qid.flags & ADB_QID_FLAG_ALLOW_FALSE_POSITIVES) {
     add_point_func = &audiodb_index_add_point_approximate;  
   } else {
@@ -163,6 +168,9 @@
   if(!(spec->qid.flags & ADB_QID_FLAG_ALLOW_FALSE_POSITIVES)) {
     audiodb_query_queue_loop(adb, spec, qstate, query, &qpointers);
   }
+
+  delete qstate->set;
+
   
  // Clean up
   if(query_data)
--- a/query.cpp	Fri Aug 21 15:23:32 2009 +0000
+++ b/query.cpp	Fri Aug 28 17:14:06 2009 +0000
@@ -477,7 +477,10 @@
         r.dist = dist;
         r.qpos = pp.qpos;
         r.ipos = pp.spos;
-        qstate->accumulator->add_point(&r);
+	if(qstate->set->find(r) == qstate->set->end()) {
+	  qstate->set->insert(r);
+	  qstate->accumulator->add_point(&r);
+	}
       }
     }
     qstate->exact_evaluation_queue->pop();