diff query.cpp @ 541:52d82badc544 multiprobeLSH

Added file caching for sparse datum reads. This required making a new type called adb_fd_cache_t and modifying the read methods audiodb_track_id_datum() and audiodb_insert_create_datum() to use the cache struct if one is provided by the user.
author mas01mc
date Sat, 07 Feb 2009 16:59:31 +0000
parents 06ed85832c3b
children 6afeb2c76957
line wrap: on
line diff
--- a/query.cpp	Sat Feb 07 12:12:46 2009 +0000
+++ b/query.cpp	Sat Feb 07 16:59:31 2009 +0000
@@ -198,26 +198,46 @@
   return 1;
 }
 
-int audiodb_track_id_datum(adb_t *adb, uint32_t track_id, adb_datum_t *d, off_t vector_offset=0, size_t num_vectors=0) {
+int audiodb_track_id_datum(adb_t *adb, uint32_t track_id, adb_datum_t *d, off_t vector_offset=0, size_t num_vectors=0, adb_fd_cache_t* cache=0){
   off_t track_offset = (*adb->track_offsets)[track_id];
+  
   if(adb->header->flags & ADB_HEADER_FLAG_REFERENCES) {
     /* create a reference/insert, then use adb_insert_create_datum() */
-    adb_reference_t reference = {0};
-    char features[ADB_MAXSTR], power[ADB_MAXSTR], times[ADB_MAXSTR];
-    lseek(adb->fd, adb->header->dataOffset + track_id * ADB_FILETABLE_ENTRY_SIZE, SEEK_SET);
-    read_or_goto_error(adb->fd, features, ADB_MAXSTR);
-    reference.features = features;
-    if(adb->header->flags & ADB_HEADER_FLAG_POWER) {
-      lseek(adb->fd, adb->header->powerTableOffset + track_id * ADB_FILETABLE_ENTRY_SIZE, SEEK_SET);
-      read_or_goto_error(adb->fd, power, ADB_MAXSTR);
-      reference.power = power;
+    adb_reference_t *reference = NULL;
+    if(! (cache && cache->reference) ){
+      reference = (adb_reference_t *) malloc(sizeof(adb_reference_t));
+      reference->features = (char*) malloc(ADB_MAXSTR*sizeof(char));
+      if(adb->header->flags & ADB_HEADER_FLAG_POWER) 
+	reference->power = (char*) malloc(ADB_MAXSTR*sizeof(char));
+      if(adb->header->flags & ADB_HEADER_FLAG_TIMES) 
+	reference->times = (char*)malloc(ADB_MAXSTR*sizeof(char));
+      if(cache)
+	cache->reference = reference;
     }
-    if(adb->header->flags & ADB_HEADER_FLAG_TIMES) {
-      lseek(adb->fd, adb->header->timesTableOffset + track_id * ADB_FILETABLE_ENTRY_SIZE, SEEK_SET);
-      read_or_goto_error(adb->fd, times, ADB_MAXSTR);
-      reference.times = times;
+    else
+      reference = cache->reference;
+
+    if(! (cache && cache->track_id==track_id) ){
+      if(cache)
+	cache->track_id = track_id;
+      lseek(adb->fd, adb->header->dataOffset + track_id * ADB_FILETABLE_ENTRY_SIZE, SEEK_SET);
+      read_or_goto_error(adb->fd, (void *)reference->features, ADB_MAXSTR);
+      if(adb->header->flags & ADB_HEADER_FLAG_POWER) {
+	lseek(adb->fd, adb->header->powerTableOffset + track_id * ADB_FILETABLE_ENTRY_SIZE, SEEK_SET);
+	read_or_goto_error(adb->fd, (void *)reference->power, ADB_MAXSTR);
+      }
+      if(adb->header->flags & ADB_HEADER_FLAG_TIMES) {
+	lseek(adb->fd, adb->header->timesTableOffset + track_id * ADB_FILETABLE_ENTRY_SIZE, SEEK_SET);
+	read_or_goto_error(adb->fd, (void *)reference->times, ADB_MAXSTR);
+      }
     }
-    return audiodb_insert_create_datum(&reference, d, vector_offset*adb->header->dim*sizeof(double), num_vectors*adb->header->dim*sizeof(double));
+
+    int retval = audiodb_insert_create_datum(reference, d, vector_offset*adb->header->dim*sizeof(double), num_vectors*adb->header->dim*sizeof(double), cache);
+    if(!cache){
+      audiodb_free_datum_reference(reference);
+      free(reference);      
+    }
+    return retval;
   } else {
     /* initialize from sources of data that we already have */
     if(num_vectors)
@@ -485,17 +505,26 @@
   double dist;
   double *dbdata = 0, *dbdata_pointer;
   Uns32T npairs = qstate->exact_evaluation_queue->size();
+  Uns32T currentTrack = qstate->exact_evaluation_queue->top().trackID+1; // i.e. not first track
 #ifdef _LSH_DEBUG_
   cout << "Num vector pairs to evaluate: " << npairs << "..." << endl;
   cout.flush();
 #endif  
   adb_datum_t d = {0};
+  adb_fd_cache_t c = {0};
+  c.track_id = currentTrack;
   while(npairs--) {
     PointPair pp = qstate->exact_evaluation_queue->top();
+    if(pp.trackID != currentTrack){
+          audiodb_free_datum(&d);
+	  currentTrack = pp.trackID;
+    }
     maybe_delete_array(dbpointers.mean_duration);
-    if(audiodb_track_id_datum(adb, pp.trackID, &d, pp.spos, sequence_length)) {
+    if(audiodb_track_id_datum(adb, pp.trackID, &d, pp.spos, sequence_length, &c)) {
       delete qstate->exact_evaluation_queue;
       delete qstate->set;
+      audiodb_free_datum(&d);
+      audiodb_free_datum_cache(&c);
       return 1;
     }
       
@@ -503,6 +532,7 @@
       delete qstate->exact_evaluation_queue;
       delete qstate->set;
       audiodb_free_datum(&d);
+      audiodb_free_datum_cache(&c);
       return 1;
     }
       
@@ -535,13 +565,11 @@
       }
     }
     qstate->exact_evaluation_queue->pop();
-    audiodb_free_datum(&d);
   }
 
   // Cleanup
-  //  maybe_delete_array(dbdata);
-  //maybe_delete_array(dbpointers.l2norm_data);
-  //maybe_delete_array(dbpointers.power_data);
+  audiodb_free_datum(&d);
+  audiodb_free_datum_cache(&c);
   maybe_delete_array(dbpointers.mean_duration);
   delete qstate->exact_evaluation_queue;
   delete qstate->set;