Mercurial > hg > audiodb
changeset 459:fcc6f7c4856b api-inversion
No more global shingle vector of vectors.
Convert audioDB::index_initialize_shingles and
audioDB::index_norm_shingles to plain old functions. In doing so, the
latter in particular acquires a silly argument list; we need that
complexity for now because it's called both from audioDB::query (which
we're currently inverting) and from audioDB::index (which is out of
scope for now).
The loss of the global vv thing made me check up on memory discipline
[hence the new API function audiodb_query_free_results() as well as the
internal audiodb_index_delete_shingles()]. It's not too bad, but there
are plenty of leaks for those with time to do
AUDIODB="valgrind --leak-check=full ../../audioDB" sh ./run-test.sh
on their favourite test case. For example, the Radius reporters leak
one triple per hit.
(Honestly, C++ memory management is teh suck.)
author | mas01cr |
---|---|
date | Sun, 28 Dec 2008 22:43:50 +0000 |
parents | 913a95f06998 |
children | 17003dff8127 |
files | audioDB.cpp audioDB.h audioDB_API.h index.cpp query.cpp |
diffstat | 5 files changed, 45 insertions(+), 29 deletions(-) [+] |
line wrap: on
line diff
--- a/audioDB.cpp Sun Dec 28 18:44:08 2008 +0000 +++ b/audioDB.cpp Sun Dec 28 22:43:50 2008 +0000 @@ -219,8 +219,6 @@ delete reporter; if(rng) gsl_rng_free(rng); - if(vv) - delete vv; if(infid>0) close(infid); if(adb && !UseApiError) {
--- a/audioDB.h Sun Dec 28 18:44:08 2008 +0000 +++ b/audioDB.h Sun Dec 28 22:43:50 2008 +0000 @@ -376,19 +376,13 @@ Uns32T lsh_param_b; // Batch size, in number of tracks, per indexing iteration Uns32T lsh_param_ncols; // Maximum number of collision in a hash-table row - - // LSH vector<> containers for one in-core copy of a set of feature vectors - vector<vector<float> > *vv; // one-track's worth data - // LSH indexing and retrieval methods void index_index_db(const char* dbName); void index_initialize(double**,double**,double**,double**,unsigned int*); void index_insert_tracks(Uns32T start_track, Uns32T end_track, double** fvpp, double** sNormpp,double** snPtrp, double** sPowerp, double** spPtrp); int index_insert_track(Uns32T trackID, double** fvpp, double** snpp, double** sppp); Uns32T index_insert_shingles(vector<vector<float> >*, Uns32T trackID, double* spp); - int index_norm_shingles(vector<vector<float> >*, double* snp, double* spp); int index_query_loop(adb_t *adb, adb_query_spec_t *spec, adb_qstate_internal_t *qstate); - vector<vector<float> >* index_initialize_shingles(Uns32T sz); int index_init_query(const char* dbName); int index_exists(const char* dbName, double radius, Uns32T sequenceLength); char* index_get_name(const char*dbName, double radius, Uns32T sequenceLength); @@ -483,6 +477,5 @@ lsh_param_m(0), \ lsh_param_N(0), \ lsh_param_b(0), \ - lsh_param_ncols(0), \ - vv(0) + lsh_param_ncols(0) #endif
--- a/audioDB_API.h Sun Dec 28 18:44:08 2008 +0000 +++ b/audioDB_API.h Sun Dec 28 22:43:50 2008 +0000 @@ -188,6 +188,7 @@ /* query function */ int audiodb_query(adb_ptr mydb, adb_query_ptr adbq, adb_queryresult_ptr adbqres); int audiodb_query_spec(adb_t *, adb_query_spec_t *, adb_query_results_t *); +int audiodb_query_free_results(adb_t *, adb_query_spec_t *, adb_query_results_t *); /* database status */ int audiodb_status(adb_ptr mydb, adb_status_ptr status);
--- a/index.cpp Sun Dec 28 18:44:08 2008 +0000 +++ b/index.cpp Sun Dec 28 22:43:50 2008 +0000 @@ -62,15 +62,18 @@ return audioDB::lsh; } -vector<vector<float> >* audioDB::index_initialize_shingles(Uns32T sz){ - if(vv) - delete vv; - vv = new vector<vector<float> >(sz); - for(Uns32T i=0 ; i < sz ; i++) - (*vv)[i]=vector<float>(dbH->dim*sequenceLength); // allocate shingle storage +vector<vector<float> > *audiodb_index_initialize_shingles(Uns32T sz, Uns32T dim, Uns32T seqLen) { + std::vector<std::vector<float> > *vv = new vector<vector<float> >(sz); + for(Uns32T i=0 ; i < sz ; i++) { + (*vv)[i]=vector<float>(dim * seqLen); + } return vv; } +void audiodb_index_delete_shingles(vector<vector<float> > *vv) { + delete vv; +} + /******************** LSH indexing audioDB database access forall s \in {S} ***********************/ // Prepare the AudioDB database for read access and allocate auxillary memory @@ -154,26 +157,28 @@ // norm shingles // in-place norming, no deletions // If using power, return number of shingles above power threshold -int audioDB::index_norm_shingles(vector<vector<float> >* vv, double* snp, double* spp){ +int audiodb_index_norm_shingles(vector<vector<float> >* vv, double* snp, double* spp, Uns32T dim, Uns32T seqLen, double radius, bool normed_vectors, bool use_pthreshold, float pthreshold) { int z = 0; // number of above-threshold shingles float l2norm; double power; float oneOverRadius = 1./(float)sqrt(radius); // Passed radius is really radius^2 float oneOverSqrtl2NormDivRad = oneOverRadius; - if(!spp) - error("LSH indexing and query requires a power feature using -w or -W"); - Uns32T shingleSize = sequenceLength*dbH->dim; + Uns32T shingleSize = seqLen * dim; + + if(!spp) { + return -1; + } for(Uns32T a=0; a<(*vv).size(); a++){ l2norm = (float)(*snp++); - if(audioDB::normalizedDistance) + if(normed_vectors) oneOverSqrtl2NormDivRad = (1./l2norm)*oneOverRadius; for(Uns32T b=0; b < shingleSize ; b++) (*vv)[a][b]*=oneOverSqrtl2NormDivRad; power = *spp++; - if(use_absolute_threshold){ - if ( power >= absolute_threshold ) + if(use_pthreshold){ + if (power >= pthreshold) z++; } else @@ -438,17 +443,23 @@ Uns32T numVecsAboveThreshold = 0, collisionCount = 0; if(numVecs){ - vv = index_initialize_shingles(numVecs); + std::vector<std::vector<float> > *vv = audiodb_index_initialize_shingles(numVecs, dbH->dim, sequenceLength); for( Uns32T pointID = 0 ; pointID < numVecs; pointID++ ) audiodb_index_make_shingle(vv, pointID, *fvpp, dbH->dim, sequenceLength); - - numVecsAboveThreshold = index_norm_shingles(vv, *snpp, *sppp); + int vcount = audiodb_index_norm_shingles(vv, *snpp, *sppp, dbH->dim, sequenceLength, radius, normalizedDistance, use_absolute_threshold, absolute_threshold); + if(vcount == -1) { + audiodb_index_delete_shingles(vv); + error("failed to norm shingles"); + } + numVecsAboveThreshold = vcount; collisionCount = index_insert_shingles(vv, trackID, *sppp); + audiodb_index_delete_shingles(vv); } + float meanCollisionCount = numVecsAboveThreshold?(float)collisionCount/numVecsAboveThreshold:0; - /* index_norm_shingles() only goes as far as the end of the + /* audiodb_index_norm_shingles() only goes as far as the end of the sequence, which is right, but the space allocated is for the whole track. */ @@ -594,14 +605,19 @@ // query vector index Uns32T Nq = (qpointers.nvectors>O2_MAXTRACKLEN?O2_MAXTRACKLEN:qpointers.nvectors) - sequenceLength + 1; - vv = index_initialize_shingles(Nq); // allocate memory to copy query vectors to shingles + std::vector<std::vector<float> > *vv = audiodb_index_initialize_shingles(Nq, adb->header->dim, sequenceLength); // allocate memory to copy query vectors to shingles // Construct shingles from query features for( Uns32T pointID = 0 ; pointID < Nq ; pointID++ ) audiodb_index_make_shingle(vv, pointID, query, dbH->dim, sequenceLength); // Normalize query vectors - Uns32T numVecsAboveThreshold = index_norm_shingles( vv, qpointers.l2norm, qpointers.power ); + int vcount = audiodb_index_norm_shingles(vv, qpointers.l2norm, qpointers.power, dbH->dim, sequenceLength, radius, normalizedDistance, use_absolute_threshold, absolute_threshold); + if(vcount == -1) { + audiodb_index_delete_shingles(vv); + error("failed to norm shingles"); + } + Uns32T numVecsAboveThreshold = vcount; // Nq contains number of inspected points in query file, // numVecsAboveThreshold is number of points with power >= absolute_threshold @@ -621,6 +637,7 @@ lsh->serial_retrieve_point(database, (*vv)[pointID], pointID, add_point_func, &callback_data); } } + audiodb_index_delete_shingles(vv); if(lsh_exact) // Perform exact distance computation on point pairs in exact_evaluation_queue
--- a/query.cpp Sun Dec 28 18:44:08 2008 +0000 +++ b/query.cpp Sun Dec 28 22:43:50 2008 +0000 @@ -258,10 +258,17 @@ adb_result_t r = rs->results[k]; reporter->add_point(audiodb_key_index(adb, r.key), r.qpos, r.ipos, r.dist); } + audiodb_query_free_results(adb, &qspec, rs); reporter->report(fileTable, adbQueryResponse); } +int audiodb_query_free_results(adb_t *adb, adb_query_spec_t *spec, adb_query_results_t *rs) { + free(rs->results); + free(rs); + return 0; +} + static void audiodb_initialize_arrays(adb_t *adb, adb_query_spec_t *spec, int track, unsigned int numVectors, double *query, double *data_buffer, double **D, double **DD) { unsigned int j, k, l, w; double *dp, *qp, *sp;