changeset 546:e8193805ce42 multiprobeLSH

Rework new audiodb_track_id_datum_offset() and audiodb_insert_create_datum_offset() so that all offsets are in num_vectors. Internals of data storage are left the the methods to implement. If we are going to export these methods to the API, how do we expose the cache ? It needs to be passed back to the user and back in by the user on each call for caching to be re-entrant. No (intentional) functional changes this version.
author mas01mc
date Sun, 08 Feb 2009 22:32:33 +0000
parents bf89c80ec4cc
children ecfba4208621
files audioDB-internals.h insert.cpp query.cpp
diffstat 3 files changed, 56 insertions(+), 35 deletions(-) [+]
line wrap: on
line diff
--- a/audioDB-internals.h	Sun Feb 08 15:53:57 2009 +0000
+++ b/audioDB-internals.h	Sun Feb 08 22:32:33 2009 +0000
@@ -302,8 +302,8 @@
 }
 
 int audiodb_read_data(adb_t *, int, int, double **, size_t *);
-int audiodb_insert_create_datum_offset(adb_insert_t *, adb_datum_t *, off_t data_offset, size_t data_size, adb_fd_cache_t * cache=NULL);
-int audiodb_track_id_datum_offset(adb_t *, uint32_t , adb_datum_t *, off_t vector_offset, size_t vector_size, adb_fd_cache_t * cache=NULL);
+int audiodb_insert_create_datum_offset(adb_insert_t *, adb_datum_t *, off_t vector_offset, size_t num_vectors, adb_fd_cache_t *cache=NULL);
+int audiodb_track_id_datum_offset(adb_t *, uint32_t , adb_datum_t *, off_t vector_offset, size_t num_vectors, adb_fd_cache_t *cache=NULL);
 int audiodb_insert_create_datum(adb_insert_t * insert, adb_datum_t *datum);
 int audiodb_track_id_datum(adb_t * adb, uint32_t track_id, adb_datum_t *datum);
 int audiodb_free_datum(adb_datum_t *);
--- a/insert.cpp	Sun Feb 08 15:53:57 2009 +0000
+++ b/insert.cpp	Sun Feb 08 22:32:33 2009 +0000
@@ -337,7 +337,7 @@
   return audiodb_insert_create_datum_offset(insert, datum, 0, 0, 0);
 }
 
-int audiodb_insert_create_datum_offset(adb_insert_t *insert, adb_datum_t *datum, off_t data_offset, size_t data_size, adb_fd_cache_t *cache) {
+int audiodb_insert_create_datum_offset(adb_insert_t *insert, adb_datum_t *datum, off_t vector_offset, size_t num_vectors, adb_fd_cache_t *cache) {
   int fd = 0;
   FILE *file = NULL;
   struct stat st;
@@ -351,8 +351,9 @@
   }
 
   // STEP 1 check if we need to clear the cache
-  if(cache && (cache->fname && strncmp(cache->fname, insert->features, strlen(insert->features))!=0))
+  if(cache && (cache->fname && strncmp(cache->fname, insert->features, strlen(insert->features))!=0)){
     clear_cache = true;
+  }
 
   // STEP 2. Clear the cache if necessary
   if(cache && clear_cache){
@@ -388,10 +389,12 @@
   }
 
   // STEP 5. Allocate data memory if necessary, read the requested amount of data
-  if(data_size)
-    size = data_size;
-  else
+  if(num_vectors){
+    size = num_vectors*datum->dim*sizeof(double);
+  }
+  else{
     size = st.st_size - sizeof(uint32_t);
+  }
 
   datum->nvectors = size / (sizeof(double) * datum->dim);
 
@@ -403,13 +406,15 @@
     goto error;
   }
   
-  if(data_offset)
-    lseek(fd, sizeof(uint32_t) + data_offset, SEEK_SET);
+  if(vector_offset){
+    lseek(fd, sizeof(uint32_t) + vector_offset*datum->dim*sizeof(double), SEEK_SET);
+  }
   read_or_goto_error(fd, datum->data, size);
 
   // STEP 6. Close the file descriptor, unless we are caching it
-  if(!cache)
+  if(!cache){
     close(fd);
+  }
   fd = 0; // we're done with the data
 
   if(insert->power) {
@@ -422,8 +427,9 @@
     }
 
     // Use the cached file descriptor or open a new file descriptor
-    if (cache && cache->power_fd)
+    if (cache && cache->power_fd){
       fd = cache->power_fd;
+    }
     else if((fd = open(insert->power, O_RDONLY)) == -1) {
       goto error;
     }
@@ -459,7 +465,7 @@
      * I hate C.
      */
 
-    if( (!data_size) && ((off_t) (st.st_size - sizeof(uint32_t))) != (size / datum->dim)) {
+    if( (!num_vectors) && ((off_t) (st.st_size - sizeof(uint32_t))) != (size / datum->dim)) {
       goto error;
     }
 
@@ -469,8 +475,9 @@
       if(dim != 1) {
 	goto error;
       }
-      if(cache)
+      if(cache){
 	cache->power_fd = fd;
+      }
     }
 
     // Allocate data memory if necessary, read the requested amount of data
@@ -480,13 +487,15 @@
       goto error;
     }
 
-    if(data_offset)
-      lseek(fd, sizeof(uint32_t) + data_offset/datum->dim, SEEK_SET);
+    if(vector_offset){
+      lseek(fd, sizeof(uint32_t) + vector_offset*sizeof(double), SEEK_SET);
+    }
 
     read_or_goto_error(fd, datum->power, size / datum->dim);
 
-    if(!cache)
+    if(!cache){
       close(fd);
+    }
     fd = 0;
   }
 
@@ -500,31 +509,38 @@
     }
 
     // Use the cached file descriptor or open a new file descriptor and maybe cache
-    if (cache && cache->times_file)
+    if (cache && cache->times_file){
       file = cache->times_file;
+    }
     else{
       if(!(file = fopen(insert->times, "r"))) {
 	goto error;
       }
-      if(cache)
+      if(cache){
         cache->times_file = file;
+      }
     }
     
     // Allocate data memory if necessary, read the requested amount of data
-    if(!datum->times)
+    if(!datum->times){
       datum->times = (double *) malloc(2 * size / datum->dim);
+    }
     if(!datum->times) {
       goto error;
     }
 
     rewind(file);
+
     if(fscanf(file, " %lf", &t) != 1) {
       goto error;
     }
-    if(data_offset)
-      while(data_offset-- != 1 )
-	if(fscanf(file, " %lf", &t) != 1)
+    if(vector_offset){
+      while(vector_offset-- != 1 ){
+	if(fscanf(file, " %lf", &t) != 1){ 
 	  goto error;
+	}
+      }
+    }
     tp = datum->times;
     *tp++ = t;
     for(unsigned int n = 0; n < datum->nvectors - 1; n++) {
--- a/query.cpp	Sun Feb 08 15:53:57 2009 +0000
+++ b/query.cpp	Sun Feb 08 22:32:33 2009 +0000
@@ -203,31 +203,35 @@
 }
 
 int audiodb_track_id_datum_offset(adb_t *adb, uint32_t track_id, adb_datum_t *d, off_t vector_offset, size_t num_vectors, adb_fd_cache_t* cache){
-  off_t track_offset = (*adb->track_offsets)[track_id];
-  
   if(adb->header->flags & ADB_HEADER_FLAG_REFERENCES) {
     /* create a reference/insert, then use adb_insert_create_datum() */
     adb_reference_t *reference = NULL;
     if(! (cache && cache->reference) ){
       reference = (adb_reference_t *) malloc(sizeof(adb_reference_t));
       reference->features = (char*) malloc(ADB_MAXSTR*sizeof(char));
-      if(adb->header->flags & ADB_HEADER_FLAG_POWER) 
+      if(adb->header->flags & ADB_HEADER_FLAG_POWER) {
 	reference->power = (char*) malloc(ADB_MAXSTR*sizeof(char));
-      else
+      }
+      else{
 	reference->power = NULL;
-      if(adb->header->flags & ADB_HEADER_FLAG_TIMES) 
+      }
+      if(adb->header->flags & ADB_HEADER_FLAG_TIMES){
 	reference->times = (char*)malloc(ADB_MAXSTR*sizeof(char));
-      else
+      }
+      else{
 	reference->times = NULL;
-      if(cache)
+      }
+      if(cache){
 	cache->reference = reference;
+      }
     }
-    else
+    else{
       reference = cache->reference;
-
+    }
     if(! (cache && cache->track_id==track_id) ){
-      if(cache)
+      if(cache){
 	cache->track_id = track_id;
+      }
       lseek(adb->fd, adb->header->dataOffset + track_id * ADB_FILETABLE_ENTRY_SIZE, SEEK_SET);
       read_or_goto_error(adb->fd, (void *)reference->features, ADB_MAXSTR);
       if(adb->header->flags & ADB_HEADER_FLAG_POWER) {
@@ -239,14 +243,15 @@
 	read_or_goto_error(adb->fd, (void *)reference->times, ADB_MAXSTR);
       }
     }
-
-    int retval = audiodb_insert_create_datum_offset(reference, d, vector_offset*adb->header->dim*sizeof(double), num_vectors*adb->header->dim*sizeof(double), cache);
+    int retval = audiodb_insert_create_datum_offset(reference, d, vector_offset, num_vectors, cache);
     if(!cache){
       audiodb_free_datum_reference(reference);
       free(reference);      
     }
     return retval;
-  } else {
+  } 
+  else {
+    off_t track_offset = (*adb->track_offsets)[track_id];
     /* initialize from sources of data that we already have */
     if(num_vectors)
       d->nvectors = num_vectors;