Mercurial > hg > audiodb
changeset 546:e8193805ce42 multiprobeLSH
Rework new audiodb_track_id_datum_offset() and audiodb_insert_create_datum_offset() so that all offsets are in num_vectors. Internals of data storage are left the the methods to implement. If we are going to export these methods to the API, how do we expose the cache ? It needs to be passed back to the user and back in by the user on each call for caching to be re-entrant. No (intentional) functional changes this version.
author | mas01mc |
---|---|
date | Sun, 08 Feb 2009 22:32:33 +0000 |
parents | bf89c80ec4cc |
children | ecfba4208621 |
files | audioDB-internals.h insert.cpp query.cpp |
diffstat | 3 files changed, 56 insertions(+), 35 deletions(-) [+] |
line wrap: on
line diff
--- a/audioDB-internals.h Sun Feb 08 15:53:57 2009 +0000 +++ b/audioDB-internals.h Sun Feb 08 22:32:33 2009 +0000 @@ -302,8 +302,8 @@ } int audiodb_read_data(adb_t *, int, int, double **, size_t *); -int audiodb_insert_create_datum_offset(adb_insert_t *, adb_datum_t *, off_t data_offset, size_t data_size, adb_fd_cache_t * cache=NULL); -int audiodb_track_id_datum_offset(adb_t *, uint32_t , adb_datum_t *, off_t vector_offset, size_t vector_size, adb_fd_cache_t * cache=NULL); +int audiodb_insert_create_datum_offset(adb_insert_t *, adb_datum_t *, off_t vector_offset, size_t num_vectors, adb_fd_cache_t *cache=NULL); +int audiodb_track_id_datum_offset(adb_t *, uint32_t , adb_datum_t *, off_t vector_offset, size_t num_vectors, adb_fd_cache_t *cache=NULL); int audiodb_insert_create_datum(adb_insert_t * insert, adb_datum_t *datum); int audiodb_track_id_datum(adb_t * adb, uint32_t track_id, adb_datum_t *datum); int audiodb_free_datum(adb_datum_t *);
--- a/insert.cpp Sun Feb 08 15:53:57 2009 +0000 +++ b/insert.cpp Sun Feb 08 22:32:33 2009 +0000 @@ -337,7 +337,7 @@ return audiodb_insert_create_datum_offset(insert, datum, 0, 0, 0); } -int audiodb_insert_create_datum_offset(adb_insert_t *insert, adb_datum_t *datum, off_t data_offset, size_t data_size, adb_fd_cache_t *cache) { +int audiodb_insert_create_datum_offset(adb_insert_t *insert, adb_datum_t *datum, off_t vector_offset, size_t num_vectors, adb_fd_cache_t *cache) { int fd = 0; FILE *file = NULL; struct stat st; @@ -351,8 +351,9 @@ } // STEP 1 check if we need to clear the cache - if(cache && (cache->fname && strncmp(cache->fname, insert->features, strlen(insert->features))!=0)) + if(cache && (cache->fname && strncmp(cache->fname, insert->features, strlen(insert->features))!=0)){ clear_cache = true; + } // STEP 2. Clear the cache if necessary if(cache && clear_cache){ @@ -388,10 +389,12 @@ } // STEP 5. Allocate data memory if necessary, read the requested amount of data - if(data_size) - size = data_size; - else + if(num_vectors){ + size = num_vectors*datum->dim*sizeof(double); + } + else{ size = st.st_size - sizeof(uint32_t); + } datum->nvectors = size / (sizeof(double) * datum->dim); @@ -403,13 +406,15 @@ goto error; } - if(data_offset) - lseek(fd, sizeof(uint32_t) + data_offset, SEEK_SET); + if(vector_offset){ + lseek(fd, sizeof(uint32_t) + vector_offset*datum->dim*sizeof(double), SEEK_SET); + } read_or_goto_error(fd, datum->data, size); // STEP 6. Close the file descriptor, unless we are caching it - if(!cache) + if(!cache){ close(fd); + } fd = 0; // we're done with the data if(insert->power) { @@ -422,8 +427,9 @@ } // Use the cached file descriptor or open a new file descriptor - if (cache && cache->power_fd) + if (cache && cache->power_fd){ fd = cache->power_fd; + } else if((fd = open(insert->power, O_RDONLY)) == -1) { goto error; } @@ -459,7 +465,7 @@ * I hate C. */ - if( (!data_size) && ((off_t) (st.st_size - sizeof(uint32_t))) != (size / datum->dim)) { + if( (!num_vectors) && ((off_t) (st.st_size - sizeof(uint32_t))) != (size / datum->dim)) { goto error; } @@ -469,8 +475,9 @@ if(dim != 1) { goto error; } - if(cache) + if(cache){ cache->power_fd = fd; + } } // Allocate data memory if necessary, read the requested amount of data @@ -480,13 +487,15 @@ goto error; } - if(data_offset) - lseek(fd, sizeof(uint32_t) + data_offset/datum->dim, SEEK_SET); + if(vector_offset){ + lseek(fd, sizeof(uint32_t) + vector_offset*sizeof(double), SEEK_SET); + } read_or_goto_error(fd, datum->power, size / datum->dim); - if(!cache) + if(!cache){ close(fd); + } fd = 0; } @@ -500,31 +509,38 @@ } // Use the cached file descriptor or open a new file descriptor and maybe cache - if (cache && cache->times_file) + if (cache && cache->times_file){ file = cache->times_file; + } else{ if(!(file = fopen(insert->times, "r"))) { goto error; } - if(cache) + if(cache){ cache->times_file = file; + } } // Allocate data memory if necessary, read the requested amount of data - if(!datum->times) + if(!datum->times){ datum->times = (double *) malloc(2 * size / datum->dim); + } if(!datum->times) { goto error; } rewind(file); + if(fscanf(file, " %lf", &t) != 1) { goto error; } - if(data_offset) - while(data_offset-- != 1 ) - if(fscanf(file, " %lf", &t) != 1) + if(vector_offset){ + while(vector_offset-- != 1 ){ + if(fscanf(file, " %lf", &t) != 1){ goto error; + } + } + } tp = datum->times; *tp++ = t; for(unsigned int n = 0; n < datum->nvectors - 1; n++) {
--- a/query.cpp Sun Feb 08 15:53:57 2009 +0000 +++ b/query.cpp Sun Feb 08 22:32:33 2009 +0000 @@ -203,31 +203,35 @@ } int audiodb_track_id_datum_offset(adb_t *adb, uint32_t track_id, adb_datum_t *d, off_t vector_offset, size_t num_vectors, adb_fd_cache_t* cache){ - off_t track_offset = (*adb->track_offsets)[track_id]; - if(adb->header->flags & ADB_HEADER_FLAG_REFERENCES) { /* create a reference/insert, then use adb_insert_create_datum() */ adb_reference_t *reference = NULL; if(! (cache && cache->reference) ){ reference = (adb_reference_t *) malloc(sizeof(adb_reference_t)); reference->features = (char*) malloc(ADB_MAXSTR*sizeof(char)); - if(adb->header->flags & ADB_HEADER_FLAG_POWER) + if(adb->header->flags & ADB_HEADER_FLAG_POWER) { reference->power = (char*) malloc(ADB_MAXSTR*sizeof(char)); - else + } + else{ reference->power = NULL; - if(adb->header->flags & ADB_HEADER_FLAG_TIMES) + } + if(adb->header->flags & ADB_HEADER_FLAG_TIMES){ reference->times = (char*)malloc(ADB_MAXSTR*sizeof(char)); - else + } + else{ reference->times = NULL; - if(cache) + } + if(cache){ cache->reference = reference; + } } - else + else{ reference = cache->reference; - + } if(! (cache && cache->track_id==track_id) ){ - if(cache) + if(cache){ cache->track_id = track_id; + } lseek(adb->fd, adb->header->dataOffset + track_id * ADB_FILETABLE_ENTRY_SIZE, SEEK_SET); read_or_goto_error(adb->fd, (void *)reference->features, ADB_MAXSTR); if(adb->header->flags & ADB_HEADER_FLAG_POWER) { @@ -239,14 +243,15 @@ read_or_goto_error(adb->fd, (void *)reference->times, ADB_MAXSTR); } } - - int retval = audiodb_insert_create_datum_offset(reference, d, vector_offset*adb->header->dim*sizeof(double), num_vectors*adb->header->dim*sizeof(double), cache); + int retval = audiodb_insert_create_datum_offset(reference, d, vector_offset, num_vectors, cache); if(!cache){ audiodb_free_datum_reference(reference); free(reference); } return retval; - } else { + } + else { + off_t track_offset = (*adb->track_offsets)[track_id]; /* initialize from sources of data that we already have */ if(num_vectors) d->nvectors = num_vectors;