Mercurial > hg > audiodb
changeset 465:1030664df98c api-inversion
No more audioDB::index_allocate and audioDB::index_init_query
No more SERVER_LSH_INDEX_SINGLETON, either; instead each adb_t contains
a single cache of the last used in-core index. At the moment, this
cache is unused by the server (and the previous cache code has been
replaced by a comment), but I think that this way everyone can be
allowed to benefit without anyone having to explicitly manage indexes
themselves.
I'm not going to say how long I wandered in a maze of valgrind before
giving up and keeping the hacky workaround for loading the lsh tables
[see the FIXME comment in audiodb_index_init_query()]; let's just say
that it was long enough to find the extra bonus crashy close(lshfid) in
audioDB::index_index_db.
Also, delete the abstraction-inverting LSH stuff from query.cpp where we
are making our reporters; the fix for that, which is presumably when
creating small indexes for large datasets, is to implement
space-efficient reporters. (The accumulator code, which is my second
attempt, is more space-efficient than the reporters; inspiration may
wish to be drawn...)
author | mas01cr |
---|---|
date | Tue, 30 Dec 2008 23:56:57 +0000 |
parents | 35bb388d0eac |
children | 11fccb6a3bd5 |
files | audioDB-internals.h audioDB.cpp audioDB.h close.cpp index.cpp open.cpp query.cpp soap.cpp |
diffstat | 8 files changed, 53 insertions(+), 91 deletions(-) [+] |
line wrap: on
line diff
--- a/audioDB-internals.h Tue Dec 30 15:38:59 2008 +0000 +++ b/audioDB-internals.h Tue Dec 30 23:56:57 2008 +0000 @@ -52,6 +52,7 @@ std::map<std::string,uint32_t> *keymap; std::vector<uint32_t> *track_lengths; std::vector<off_t> *track_offsets; + LSH *cached_lsh; }; typedef struct {
--- a/audioDB.cpp Tue Dec 30 15:38:59 2008 +0000 +++ b/audioDB.cpp Tue Dec 30 23:56:57 2008 +0000 @@ -4,7 +4,6 @@ #include "audioDB-internals.h" } -LSH* SERVER_LSH_INDEX_SINGLETON; char* SERVER_ADB_ROOT; char* SERVER_ADB_FEATURE_ROOT; @@ -225,7 +224,7 @@ audiodb_close(adb); adb = NULL; } - if(lsh!=SERVER_LSH_INDEX_SINGLETON) + if(lsh) delete lsh; } @@ -826,7 +825,6 @@ // This entry point is visited once per instance // so it is a good place to set any global state variables int main(const int argc, const char* argv[]){ - SERVER_LSH_INDEX_SINGLETON = 0; // Initialize global variables SERVER_ADB_ROOT = 0; // Server-side database root prefix SERVER_ADB_FEATURE_ROOT = 0; // Server-side features root prefix audioDB(argc, argv);
--- a/audioDB.h Tue Dec 30 15:38:59 2008 +0000 +++ b/audioDB.h Tue Dec 30 23:56:57 2008 +0000 @@ -47,6 +47,7 @@ Accumulator *accumulator; std::set<std::string> *allowed_keys; std::priority_queue<PointPair> *exact_evaluation_queue; + LSH *lsh; } adb_qstate_internal_t; #define MAXSTR 512 @@ -198,7 +199,6 @@ #define SAFE_DELETE(PTR) delete PTR; PTR=0; #define SAFE_DELETE_ARRAY(PTR) delete[] PTR; PTR=0; -extern LSH* SERVER_LSH_INDEX_SINGLETON; extern char* SERVER_ADB_ROOT; extern char* SERVER_ADB_FEATURE_ROOT; @@ -365,9 +365,6 @@ int index_insert_track(Uns32T trackID, double** fvpp, double** snpp, double** sppp); Uns32T index_insert_shingles(vector<vector<float> >*, Uns32T trackID, double* spp); int index_query_loop(adb_t *adb, adb_query_spec_t *spec, adb_qstate_internal_t *qstate); - int index_init_query(const char* dbName); - int index_exists(const char* dbName, double radius, Uns32T sequenceLength); - LSH* index_allocate(char* indexName, bool load_hashTables); void insertPowerData(unsigned n, int powerfd, double *powerdata); void init_track_aux_data(Uns32T trackID, double* fvp, double** sNormpp,double** snPtrp, double** sPowerp, double** spPtrp);
--- a/close.cpp Tue Dec 30 15:38:59 2008 +0000 +++ b/close.cpp Tue Dec 30 23:56:57 2008 +0000 @@ -11,6 +11,9 @@ delete adb->keymap; delete adb->track_lengths; delete adb->track_offsets; + if(adb->cached_lsh) { + delete adb->cached_lsh; + } close(adb->fd); free(adb); }
--- a/index.cpp Tue Dec 30 15:38:59 2008 +0000 +++ b/index.cpp Tue Dec 30 23:56:57 2008 +0000 @@ -62,21 +62,23 @@ } } -// If we are a server and have a memory-resident index, check the indexName against the resident index (using get_indexName()) -// If they match, i.e. path+dbName_resident == path+dbName_requested, use -// the memory-resident index. -// Else allocate a new LSH instance and load the index from disk -LSH* audioDB::index_allocate(char* indexName, bool load_hashTables){ - LSH* gIndx=SERVER_LSH_INDEX_SINGLETON; - if(isServer && gIndx && (strncmp(gIndx->get_indexName(), indexName, MAXSTR)==0) ) - audioDB::lsh = gIndx; // Use the global SERVER resident index - else{ - if(audioDB::lsh) - delete audioDB::lsh; - audioDB::lsh = new LSH(indexName, load_hashTables); +/* FIXME: the indexName arg should be "const char *", but the LSH + * library doesn't like that. + */ +LSH *audiodb_index_allocate(adb_t *adb, char *indexName, bool load_tables) { + LSH *lsh; + if(adb->cached_lsh) { + if(!strncmp(adb->cached_lsh->get_indexName(), indexName, MAXSTR)) { + return adb->cached_lsh; + } else { + delete adb->cached_lsh; + } } - assert(audioDB::lsh); - return audioDB::lsh; + lsh = new LSH(indexName, load_tables); + if(load_tables) { + adb->cached_lsh = lsh; + } + return lsh; } vector<vector<float> > *audiodb_index_initialize_shingles(Uns32T sz, Uns32T dim, Uns32T seqLen) { @@ -268,6 +270,7 @@ // Clean up delete lsh; lsh = 0; + } else { close(lshfid); } @@ -513,48 +516,27 @@ // return true if indexed query performed else return false -int audioDB::index_init_query(const char* dbName){ +int audiodb_index_init_query(adb_t *adb, adb_query_spec_t *spec, adb_qstate_internal_t *qstate, bool corep) { - if(!(audiodb_index_exists(dbName, radius, sequenceLength))) + uint32_t sequence_length = spec->qid.sequence_length; + double radius = spec->refine.radius; + if(!(audiodb_index_exists(adb->path, radius, sequence_length))) return false; - char *indexName = audiodb_index_get_name(dbName, radius, sequenceLength); + char *indexName = audiodb_index_get_name(adb->path, radius, sequence_length); if(!indexName) { - error("failed to get index name", dbName); + return false; } - // Test to see if file exists - if((lshfid = open (indexName, O_RDONLY)) < 0){ - delete[] indexName; - return false; + qstate->lsh = audiodb_index_allocate(adb, indexName, corep); + + /* FIXME: it would be nice if the LSH library didn't make me do + * this. */ + if((!corep) && (qstate->lsh->get_lshHeader()->flags & O2_SERIAL_FILEFORMAT2)) { + delete qstate->lsh; + qstate->lsh = audiodb_index_allocate(adb, indexName, true); } - lsh = index_allocate(indexName, false); // Get the header only here - sequenceLength = lsh->get_lshHeader()->dataDim / dbH->dim; // shingleDim / vectorDim - - if(lsh!=SERVER_LSH_INDEX_SINGLETON){ - if( fabs(radius - lsh->get_radius())>fabs(O2_DISTANCE_TOLERANCE)) - printf("*** Warning: adb_radius (%f) != lsh_radius (%f) ***\n", radius, lsh->get_radius()); - VERB_LOG(1,"INDEX: dim %d\n", (int)dbH->dim); - VERB_LOG(1,"INDEX: R %f\n", lsh->get_radius()); - VERB_LOG(1,"INDEX: seqlen %d\n", sequenceLength); - VERB_LOG(1,"INDEX: w %f\n", lsh->get_lshHeader()->get_binWidth()); - VERB_LOG(1,"INDEX: k %d\n", lsh->get_lshHeader()->get_numFuns()); - VERB_LOG(1,"INDEX: L (m*(m-1))/2 %d\n", lsh->get_lshHeader()->get_numTables()); - VERB_LOG(1,"INDEX: N %d\n", lsh->get_lshHeader()->get_numRows()); - VERB_LOG(1,"INDEX: s %d\n", audiodb_index_to_track_id(lsh->get_maxp(), audiodb_lsh_n_point_bits(adb))); - VERB_LOG(1,"INDEX: Opened LSH index file %s\n", indexName); - } - - // Check to see if we are loading hash tables into core, and do so if true - if((lsh->get_lshHeader()->flags&O2_SERIAL_FILEFORMAT2) || lsh_in_core){ - if(SERVER_LSH_INDEX_SINGLETON) - fprintf(stderr,"INDEX: using persistent hash tables: %s\n", lsh->get_indexName()); - else - VERB_LOG(1,"INDEX: loading hash tables into core %s\n", (lsh->get_lshHeader()->flags&O2_SERIAL_FILEFORMAT2)?"FORMAT2":"FORMAT1"); - lsh = index_allocate(indexName, true); - } - delete[] indexName; return true; } @@ -617,8 +599,9 @@ add_point_func = &audiodb_index_add_point_approximate; } - if(!index_init_query(adb->path)) // sets-up LSH index structures for querying + if(!audiodb_index_init_query(adb, spec, qstate, lsh_in_core)) { return 0; + } char *database = audiodb_index_get_name(adb->path, radius, sequenceLength); if(!database) { @@ -649,18 +632,18 @@ // numVecsAboveThreshold is number of points with power >= absolute_threshold double* qpp = qpointers.power; // Keep original qpPtr for possible exact evaluation if(usingQueryPoint && numVecsAboveThreshold){ - if((lsh->get_lshHeader()->flags&O2_SERIAL_FILEFORMAT2) || lsh_in_core) - lsh->retrieve_point((*vv)[0], queryPoint, add_point_func, &callback_data); + if((qstate->lsh->get_lshHeader()->flags&O2_SERIAL_FILEFORMAT2) || lsh_in_core) + qstate->lsh->retrieve_point((*vv)[0], queryPoint, add_point_func, &callback_data); else - lsh->serial_retrieve_point(database, (*vv)[0], queryPoint, add_point_func, &callback_data); + qstate->lsh->serial_retrieve_point(database, (*vv)[0], queryPoint, add_point_func, &callback_data); } else if(numVecsAboveThreshold) for( Uns32T pointID = 0 ; pointID < Nq; pointID++ ) if(!use_absolute_threshold || (use_absolute_threshold && (*qpp++ >= absolute_threshold))) { - if((lsh->get_lshHeader()->flags&O2_SERIAL_FILEFORMAT2) || lsh_in_core) { - lsh->retrieve_point((*vv)[pointID], pointID, add_point_func, &callback_data); + if((qstate->lsh->get_lshHeader()->flags&O2_SERIAL_FILEFORMAT2) || lsh_in_core) { + qstate->lsh->retrieve_point((*vv)[pointID], pointID, add_point_func, &callback_data); } else { - lsh->serial_retrieve_point(database, (*vv)[pointID], pointID, add_point_func, &callback_data); + qstate->lsh->serial_retrieve_point(database, (*vv)[pointID], pointID, add_point_func, &callback_data); } } audiodb_index_delete_shingles(vv); @@ -669,9 +652,6 @@ // Perform exact distance computation on point pairs in exact_evaluation_queue audiodb_query_queue_loop(adb, spec, qstate, query, &qpointers); - // Close the index file - close(lshfid); - // Clean up if(query_data) delete[] query_data; @@ -683,6 +663,8 @@ delete[] qpointers.mean_duration; if(database) delete[] database; + if(qstate->lsh != adb->cached_lsh) + delete qstate->lsh; return Nq; }
--- a/open.cpp Tue Dec 30 15:38:59 2008 +0000 +++ b/open.cpp Tue Dec 30 23:56:57 2008 +0000 @@ -128,6 +128,7 @@ if(audiodb_collect_track_lengths(adb)) { goto error; } + adb->cached_lsh = 0; return adb; error:
--- a/query.cpp Tue Dec 30 15:38:59 2008 +0000 +++ b/query.cpp Tue Dec 30 23:56:57 2008 +0000 @@ -146,11 +146,6 @@ case O2_SEQUENCE_QUERY: if(!(qspec.refine.flags & ADB_REFINE_RADIUS)) { reporter = new trackAveragingReporter< std::less< NNresult > >(pointNN, trackNN, dbH->numFiles); - } else if (audiodb_index_exists(adb->path, qspec.refine.radius, qspec.qid.sequence_length)) { - char *indexName = audiodb_index_get_name(adb->path, qspec.refine.radius, qspec.qid.sequence_length); - lsh = index_allocate(indexName, false); - reporter = new trackSequenceQueryRadReporter(trackNN, audiodb_index_to_track_id(lsh->get_maxp(), audiodb_lsh_n_point_bits(adb))+1); - delete[] indexName; } else { reporter = new trackSequenceQueryRadReporter(trackNN, dbH->numFiles); } @@ -158,11 +153,6 @@ case O2_N_SEQUENCE_QUERY: if(!(qspec.refine.flags & ADB_REFINE_RADIUS)) { reporter = new trackSequenceQueryNNReporter< std::less < NNresult > >(pointNN, trackNN, dbH->numFiles); - } else if (audiodb_index_exists(adb->path, qspec.refine.radius, qspec.qid.sequence_length)){ - char *indexName = audiodb_index_get_name(adb->path, qspec.refine.radius, qspec.qid.sequence_length); - lsh = index_allocate(indexName, false); - reporter = new trackSequenceQueryRadNNReporter(pointNN, trackNN, audiodb_index_to_track_id(lsh->get_maxp(), audiodb_lsh_n_point_bits(adb))+1); - delete[] indexName; } else { reporter = new trackSequenceQueryRadNNReporter(pointNN, trackNN, dbH->numFiles); }
--- a/soap.cpp Tue Dec 30 15:38:59 2008 +0000 +++ b/soap.cpp Tue Dec 30 23:56:57 2008 +0000 @@ -446,22 +446,12 @@ else { fprintf(stderr, "Socket connection successful: master socket = %d\n", m); - // Make a global Web Services LSH Index (SINGLETON) - if(WS_load_index && dbName && !audiodb_index_exists(dbName, radius, sequenceLength)){ - /* FIXME: this leaks the indexName */ - error("Can't find requested index file:", audiodb_index_get_name(dbName,radius,sequenceLength)); - } - if(WS_load_index && dbName && audiodb_index_exists(dbName, radius, sequenceLength)){ - char *indexName = audiodb_index_get_name(dbName, radius, sequenceLength); - fprintf(stderr, "Loading LSH hashtables: %s...\n", indexName); - lsh = new LSH(indexName, true); - assert(lsh); - SERVER_LSH_INDEX_SINGLETON = lsh; - fprintf(stderr, "LSH INDEX READY\n"); - fflush(stderr); - delete[] indexName; - } - + /* FIXME: we used to have a global cache of a single LSH index + * here. CSR removed it because it interacted badly with + * APIification of querying, replacing it with a per-open-adb + * cache; we should try to take advantage of that instead. + */ + // Server-side path prefix to databases and features if(adb_root) SERVER_ADB_ROOT = (char*)adb_root; // Server-side database root