changeset 528:561339c7fd26 multiprobeLSH

Added audiodb_datum_qpointers_partial() to make exact evaluation of LSH results more efficient. This routine calculates partial sums only for those vectors needed.
author mas01mc
date Thu, 29 Jan 2009 13:44:47 +0000
parents 7ee6a2701d90
children e532666226bc
files Makefile audioDB-internals.h query-indexed.cpp query.cpp
diffstat 4 files changed, 56 insertions(+), 15 deletions(-) [+]
line wrap: on
line diff
--- a/Makefile	Wed Jan 28 18:55:46 2009 +0000
+++ b/Makefile	Thu Jan 29 13:44:47 2009 +0000
@@ -17,19 +17,17 @@
 MINORVERSION=0
 LIBRARY=lib$(EXECUTABLE).so.$(SOVERSION).$(MINORVERSION)
 
-override CFLAGS+=-ggdb -g -fPIC
+override CFLAGS+=-O3 -fPIC -pg
 
 # set to DUMP hashtables on QUERY load
 #override CFLAGS+=-DLSH_DUMP_CORE_TABLES
 
 # set to turn on debugging information for LSH hashtables
-override CFLAGS+=-D_LSH_DEBUG_
+#override CFLAGS+=-D_LSH_DEBUG_
 
 # set to increase multiple probes in LSH QUERY (allowable range = 1 ... lsh_k*2)
 #override CFLAGS+=-DLSH_MULTI_PROBE_COUNT=10
 
-
-
 ifeq ($(shell uname),Linux)
 override CFLAGS+=-D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64
 endif
--- a/audioDB-internals.h	Wed Jan 28 18:55:46 2009 +0000
+++ b/audioDB-internals.h	Thu Jan 29 13:44:47 2009 +0000
@@ -297,6 +297,7 @@
 int audiodb_track_id_datum(adb_t *, uint32_t, adb_datum_t *);
 int audiodb_free_datum(adb_datum_t *);
 int audiodb_datum_qpointers(adb_datum_t *, uint32_t, double **, double **, adb_qpointers_internal_t *);
+int audiodb_datum_qpointers_partial(adb_datum_t *, uint32_t, double **, double **, adb_qpointers_internal_t *, adb_qstate_internal_t *);
 int audiodb_query_spec_qpointers(adb_t *, const adb_query_spec_t *, double **, double **, adb_qpointers_internal_t *);
 int audiodb_query_queue_loop(adb_t *, const adb_query_spec_t *, adb_qstate_internal_t *, double *, adb_qpointers_internal_t *);
 int audiodb_query_loop(adb_t *, const adb_query_spec_t *, adb_qstate_internal_t *);
--- a/query-indexed.cpp	Wed Jan 28 18:55:46 2009 +0000
+++ b/query-indexed.cpp	Thu Jan 29 13:44:47 2009 +0000
@@ -115,21 +115,11 @@
     return -1;
   }
 
-#ifdef _LSH_DEBUG_
-  cout << "spec_qpointers...";
-  cout.flush();
-#endif
-
   if(audiodb_query_spec_qpointers(adb, spec, &query_data, &query, &qpointers)) {
     delete [] database;
     return -1;
   }
 
-#ifdef _LSH_DEBUG_
-  cout << "done" << endl;
-  cout.flush();
-#endif
-
   uint32_t Nq = (qpointers.nvectors > ADB_LSH_MAXTRACKLEN ? ADB_LSH_MAXTRACKLEN : qpointers.nvectors) - sequence_length + 1;
   std::vector<std::vector<float> > *vv = audiodb_index_initialize_shingles(Nq, adb->header->dim, sequence_length);
 
--- a/query.cpp	Wed Jan 28 18:55:46 2009 +0000
+++ b/query.cpp	Thu Jan 29 13:44:47 2009 +0000
@@ -280,6 +280,58 @@
   return 0;
 }
 
+int audiodb_datum_qpointers_partial(adb_datum_t *d, uint32_t sequence_length, double **vector_data, 
+				    double **vector, adb_qpointers_internal_t *qpointers, 
+				    adb_qstate_internal_t *qstate){
+  uint32_t nvectors = d->nvectors;
+  qpointers->nvectors = nvectors;
+  std::priority_queue<PointPair> ppairs(*qstate->exact_evaluation_queue);
+  
+  size_t vector_size = nvectors * sizeof(double) * d->dim;
+  *vector_data = new double[vector_size];
+  qpointers->l2norm_data = new double[vector_size / d->dim];
+  if(d->power)
+    qpointers->power_data = new double[vector_size / d->dim];
+
+  uint32_t seq_len_dbl = sequence_length*d->dim*sizeof(double);
+  PointPair pp = ppairs.top();
+  uint32_t tid = pp.trackID;
+
+  while( !ppairs.empty() && pp.trackID==tid){
+    uint32_t spos = pp.spos;
+    uint32_t spos_dim = spos*d->dim;
+
+    memcpy(*vector_data+spos_dim, d->data+spos_dim, seq_len_dbl); 
+
+    audiodb_l2norm_buffer(*vector_data+spos_dim, d->dim, 1, qpointers->l2norm_data+spos);
+    audiodb_sequence_sum(qpointers->l2norm_data+spos, sequence_length, sequence_length);
+    audiodb_sequence_sqrt(qpointers->l2norm_data+spos, sequence_length, sequence_length);
+
+    if(d->power) {
+      memcpy(qpointers->power_data+spos, d->power+spos, seq_len_dbl);
+      audiodb_sequence_sum(qpointers->power_data+spos, sequence_length, sequence_length);
+      audiodb_sequence_average(qpointers->power_data+spos, sequence_length, sequence_length);
+    }
+    ppairs.pop();
+    if(!ppairs.empty())
+      pp = ppairs.top();
+  }
+
+  if(d->times) {
+    qpointers->mean_duration = new double[1];
+    *qpointers->mean_duration = 0;
+    for(unsigned int k = 0; k < nvectors; k++) {
+      *qpointers->mean_duration += d->times[2*k+1] - d->times[2*k];
+    }
+    *qpointers->mean_duration /= nvectors;
+  }  
+
+  *vector = *vector_data;
+  qpointers->l2norm = qpointers->l2norm_data;
+  qpointers->power = qpointers->power_data;
+  return 0;
+}
+
 int audiodb_query_spec_qpointers(adb_t *adb, const adb_query_spec_t *spec, double **vector_data, double **vector, adb_qpointers_internal_t *qpointers) {
   adb_datum_t *datum;
   adb_datum_t d = {0};
@@ -466,7 +518,7 @@
         delete qstate->exact_evaluation_queue;
         return 1;
       }
-      if(audiodb_datum_qpointers(&d, sequence_length, &dbdata, &dbdata_pointer, &dbpointers)) {
+      if(audiodb_datum_qpointers_partial(&d, sequence_length, &dbdata, &dbdata_pointer, &dbpointers, qstate)) {
         delete qstate->exact_evaluation_queue;
         audiodb_free_datum(&d);
         return 1;