Mercurial > hg > audiodb
changeset 532:06409b6e268f multiprobeLSH
DELETED LSH_N_POINT_BITS coding. Tracks and points are now identified with a binary search, std::lower_bound(), on *adb->track_offsets return from LSH retrieval.
author | mas01mc |
---|---|
date | Mon, 02 Feb 2009 17:41:02 +0000 |
parents | ddf763553175 |
children | eb5dd50dd7d1 |
files | Makefile audioDB-internals.h audioDB.h create.cpp index.cpp query-indexed.cpp |
diffstat | 6 files changed, 29 insertions(+), 49 deletions(-) [+] |
line wrap: on
line diff
--- a/Makefile Sat Jan 31 16:25:30 2009 +0000 +++ b/Makefile Mon Feb 02 17:41:02 2009 +0000 @@ -17,7 +17,7 @@ MINORVERSION=0 LIBRARY=lib$(EXECUTABLE).so.$(SOVERSION).$(MINORVERSION) -override CFLAGS+=-O3 -fPIC +override CFLAGS+=-O3 -fPIC #-fprofile-arcs -ftest-coverage -pg # set to DUMP hashtables on QUERY load #override CFLAGS+=-DLSH_DUMP_CORE_TABLES
--- a/audioDB-internals.h Sat Jan 31 16:25:30 2009 +0000 +++ b/audioDB-internals.h Mon Feb 02 17:41:02 2009 +0000 @@ -271,26 +271,25 @@ return (*adb->keys)[index].c_str(); } -static inline uint32_t audiodb_index_to_track_id(uint32_t lshid, uint32_t n_point_bits) { - return (lshid >> n_point_bits); +static inline uint32_t audiodb_index_to_track_id(adb_t *adb, uint32_t lshid){ + std::vector<off_t>::iterator it_b = (*adb->track_offsets).begin(); + std::vector<off_t>::iterator it_e = (*adb->track_offsets).end(); + off_t test_id = lshid*adb->header->dim*sizeof(double); + std::vector<off_t>::iterator point_p = std::lower_bound(it_b, it_e, test_id); + if(*point_p == test_id) + return point_p - it_b; // lshid is first point in found track + else + return point_p - it_b - 1; // lshid is a point in the previous track } -static inline uint32_t audiodb_index_to_track_pos(uint32_t lshid, uint32_t n_point_bits) { - return (lshid & ((1 << n_point_bits) - 1)); +static inline uint32_t audiodb_index_to_track_pos(adb_t *adb, uint32_t track_id, uint32_t lshid) { + uint32_t trackIndexOffset = (*adb->track_offsets)[track_id] / (adb->header->dim * sizeof(double)); + return lshid - trackIndexOffset; } -static inline uint32_t audiodb_index_from_trackinfo(uint32_t track_id, uint32_t track_pos, uint32_t n_point_bits) { - return ((track_id << n_point_bits) | track_pos); -} - -#define ADB_FIXME_DEFAULT_LSH_N_POINT_BITS 15 -#ifndef ADB_FIXME_LSH_N_POINT_BITS -#define ADB_FIXME_LSH_N_POINT_BITS ADB_FIXME_DEFAULT_LSH_N_POINT_BITS -#endif - -static inline uint32_t audiodb_lsh_n_point_bits(adb_t *adb) { - uint32_t nbits = adb->header->flags >> 28; - return (nbits ? nbits : ADB_FIXME_LSH_N_POINT_BITS); +static inline uint32_t audiodb_index_from_trackinfo(adb_t *adb, uint32_t track_id, uint32_t track_pos) { + uint32_t trackIndexOffset = (*adb->track_offsets)[track_id] / (adb->header->dim * sizeof(double)); + return trackIndexOffset + track_pos; } int audiodb_read_data(adb_t *, int, int, double **, size_t *); @@ -327,8 +326,6 @@ #define ADB_MAGIC ('o'|'2'<<8|'d'<<16|'b'<<24) #define ADB_FORMAT_VERSION (4U) -#define ADB_LSH_MAXTRACKLEN (1 << ADB_FIXME_LSH_N_POINT_BITS) - #define align_up(x,w) (((x) + ((1<<w)-1)) & ~((1<<w)-1)) #define align_down(x,w) ((x) & ~((1<<w)-1))
--- a/audioDB.h Sat Jan 31 16:25:30 2009 +0000 +++ b/audioDB.h Mon Feb 02 17:41:02 2009 +0000 @@ -92,7 +92,7 @@ #define O2_MAXNN (1000000U) #define O2_MAXSEQLEN (8000U) // maximum feature vectors in a sequence #define O2_MAXTRACKS (1000000U) // maximum number of tracks -#define O2_MAXTRACKLEN ADB_LSH_MAXTRACKLEN + #define O2_MAXDOTPRODUCTMEMORY (sizeof(O2_REALTYPE)*O2_MAXSEQLEN*O2_MAXSEQLEN) // 512MB #define O2_SERIAL_MAX_TRACKBATCH (1000000) #define O2_LARGE_ADB_SIZE (O2_DEFAULT_DATASIZE+1) // datasize at which features are kept externally (in Mbytes)
--- a/create.cpp Sat Jan 31 16:25:30 2009 +0000 +++ b/create.cpp Mon Feb 02 17:41:02 2009 +0000 @@ -73,15 +73,6 @@ databytes = ((off_t) datasize) * 1024 * 1024; auxbytes = databytes / datadim; - // For backward-compatibility, Record the point-encoding parameter for LSH indexing in the adb header - // If this value is 0 then it will be set to 14 - -#if ADB_FIXME_LSH_N_POINT_BITS > 15 -#error "consistency check of ADB_FIXME_LSH_N_POINT_BITS failed (>31)" -#endif - - header->flags |= ADB_FIXME_LSH_N_POINT_BITS << 28; - // If database will fit in a single file the vectors are copied into the AudioDB instance // Else all the vectors are left on the FileSystem and we use the dataOffset as storage // for the location of the features, powers and times files (assuming that arbitrary keys are used for the fileTable)
--- a/index.cpp Sat Jan 31 16:25:30 2009 +0000 +++ b/index.cpp Mon Feb 02 17:41:02 2009 +0000 @@ -128,7 +128,7 @@ // Get the lsh header info and find how many tracks are inserted already lsh = new LSH(mergeIndexName, false); // lshInCore=false to avoid loading hashTables here assert(lsh); - Uns32T maxs = audiodb_index_to_track_id(lsh->get_maxp(), audiodb_lsh_n_point_bits(adb))+1; + Uns32T maxs = audiodb_index_to_track_id(adb, lsh->get_maxp())+1; delete lsh; lsh = 0; @@ -295,19 +295,13 @@ int audioDB::index_insert_track(Uns32T trackID, double** fvpp, double** snpp, double** sppp){ // Loop over the current input track's vectors Uns32T numVecs = 0; - if (trackTable[trackID] > O2_MAXTRACKLEN) { - if (O2_MAXTRACKLEN < sequenceLength - 1) { - numVecs = 0; - } else { - numVecs = O2_MAXTRACKLEN - sequenceLength + 1; - } + + if (trackTable[trackID] < sequenceLength - 1) { + numVecs = 0; } else { - if (trackTable[trackID] < sequenceLength - 1) { - numVecs = 0; - } else { - numVecs = trackTable[trackID] - sequenceLength + 1; - } + numVecs = trackTable[trackID] - sequenceLength + 1; } + Uns32T numVecsAboveThreshold = 0, collisionCount = 0; if(numVecs){ @@ -351,7 +345,7 @@ cout << "[" << trackID << "]" << fileTable+trackID*O2_FILETABLE_ENTRY_SIZE; for( Uns32T pointID=0 ; pointID < (*vv).size(); pointID+=sequenceHop){ if(!use_absolute_threshold || (use_absolute_threshold && (*spp >= absolute_threshold))) - collisionCount += lsh->insert_point((*vv)[pointID], audiodb_index_from_trackinfo(trackID, pointID, audiodb_lsh_n_point_bits(adb))); + collisionCount += lsh->insert_point((*vv)[pointID], audiodb_index_from_trackinfo(adb, trackID, pointID)); spp+=sequenceHop; } return collisionCount;
--- a/query-indexed.cpp Sat Jan 31 16:25:30 2009 +0000 +++ b/query-indexed.cpp Mon Feb 02 17:41:02 2009 +0000 @@ -44,9 +44,8 @@ adb_qcallback_t *data = (adb_qcallback_t *) user_data; adb_t *adb = data->adb; adb_qstate_internal_t *qstate = data->qstate; - uint32_t nbits = audiodb_lsh_n_point_bits(adb); - uint32_t trackID = audiodb_index_to_track_id(pointID, nbits); - uint32_t spos = audiodb_index_to_track_pos(pointID, nbits); + uint32_t trackID = audiodb_index_to_track_id(adb, pointID); + uint32_t spos = audiodb_index_to_track_pos(adb, trackID, pointID); std::set<std::string>::iterator keys_end = qstate->allowed_keys->end(); if(qstate->allowed_keys->find((*adb->keys)[trackID]) != keys_end) { adb_result_t r; @@ -64,9 +63,8 @@ adb_qcallback_t *data = (adb_qcallback_t *) user_data; adb_t *adb = data->adb; adb_qstate_internal_t *qstate = data->qstate; - uint32_t nbits = audiodb_lsh_n_point_bits(adb); - uint32_t trackID = audiodb_index_to_track_id(pointID, nbits); - uint32_t spos = audiodb_index_to_track_pos(pointID, nbits); + uint32_t trackID = audiodb_index_to_track_id(adb, pointID); + uint32_t spos = audiodb_index_to_track_pos(adb, trackID, pointID); std::set<std::string>::iterator keys_end = qstate->allowed_keys->end(); if(qstate->allowed_keys->find((*adb->keys)[trackID]) != keys_end) { PointPair p(trackID, qpos, spos); @@ -124,7 +122,7 @@ return -1; } - uint32_t Nq = (qpointers.nvectors > ADB_LSH_MAXTRACKLEN ? ADB_LSH_MAXTRACKLEN : qpointers.nvectors) - sequence_length + 1; + uint32_t Nq = qpointers.nvectors - sequence_length + 1; std::vector<std::vector<float> > *vv = audiodb_index_initialize_shingles(Nq, adb->header->dim, sequence_length); // Construct shingles from query features