Mercurial > hg > audiodb
diff index.cpp @ 509:cc2b97d020b1
Code rearrangements to tease apart library code from C++ audioDB code.
There should be precisely no functional changes in this commit.
Instead, the only thing that has happened is that all the abstraction
violation and other horribleness is concentrated in one place: the
include of "audioDB-internals.h" in audioDB.h -- the separation will be
complete once that include can be removed.
This include is necessary because the command-line binary / SOAP server
still does some things directly rather than through an API: not least of
which the operations that have not yet been integrated into the API yet,
but also some messing around with constants, flags and nominally
internal functions. The intent is to remove as many of these as
possible and think quite hard about the rest.
In the meantime, the library is now much more self-contained: the only
things it uses are in the audioDB_API.h and audioDB-internals.h headers;
thus there are fewer nasty surprises lurking for readers of the code.
The Makefile has been adjusted to take advantage of this rearrangement
in the dependencies.
author | mas01cr |
---|---|
date | Thu, 15 Jan 2009 13:57:33 +0000 |
parents | 342822c2d49a |
children | 06409b6e268f |
line wrap: on
line diff
--- a/index.cpp Tue Jan 13 21:37:14 2009 +0000 +++ b/index.cpp Thu Jan 15 13:57:33 2009 +0000 @@ -12,88 +12,8 @@ // 19th August 2008 - added O2_FLAG_LARGE_ADB support #include "audioDB.h" -#include "audioDB-internals.h" -typedef struct adb_qcallback { - adb_t *adb; - adb_qstate_internal_t *qstate; -} adb_qcallback_t; - -/************************* LSH indexing and query initialization *****************/ - -/* FIXME: there are several things wrong with this: the memory - * discipline isn't ideal, the radius printing is a bit lame, the name - * getting will succeed or fail depending on whether the path was - * relative or absolute -- but most importantly encoding all that - * information in a filename is going to lose: it's impossible to - * maintain backwards-compatibility. Instead we should probably store - * the index metadata inside the audiodb instance. */ -char *audiodb_index_get_name(const char *dbName, double radius, Uns32T sequenceLength) { - char *indexName; - if(strlen(dbName) > (MAXSTR - 32)) { - return NULL; - } - indexName = new char[MAXSTR]; - strncpy(indexName, dbName, MAXSTR); - sprintf(indexName+strlen(dbName), ".lsh.%019.9f.%d", radius, sequenceLength); - return indexName; -} - -bool audiodb_index_exists(const char *dbName, double radius, Uns32T sequenceLength) { - char *indexName = audiodb_index_get_name(dbName, radius, sequenceLength); - if(!indexName) { - return false; - } - struct stat st; - if(stat(indexName, &st)) { - delete [] indexName; - return false; - } - /* FIXME: other stat checks here? */ - /* FIXME: is there any better way to check whether we can open a - * file for reading than by opening a file for reading? */ - int fd = open(indexName, O_RDONLY); - delete [] indexName; - if(fd < 0) { - return false; - } else { - close(fd); - return true; - } -} - -/* FIXME: the indexName arg should be "const char *", but the LSH - * library doesn't like that. - */ -LSH *audiodb_index_allocate(adb_t *adb, char *indexName, bool load_tables) { - LSH *lsh; - if(adb->cached_lsh) { - if(!strncmp(adb->cached_lsh->get_indexName(), indexName, MAXSTR)) { - return adb->cached_lsh; - } else { - delete adb->cached_lsh; - } - } - lsh = new LSH(indexName, load_tables); - if(load_tables) { - adb->cached_lsh = lsh; - } - return lsh; -} - -vector<vector<float> > *audiodb_index_initialize_shingles(Uns32T sz, Uns32T dim, Uns32T seqLen) { - std::vector<std::vector<float> > *vv = new vector<vector<float> >(sz); - for(Uns32T i=0 ; i < sz ; i++) { - (*vv)[i]=vector<float>(dim * seqLen); - } - return vv; -} - -void audiodb_index_delete_shingles(vector<vector<float> > *vv) { - delete vv; -} - -/******************** LSH indexing audioDB database access forall s \in {S} ***********************/ +/******* LSH indexing audioDB database access forall s \in {S} *******/ // Prepare the AudioDB database for read access and allocate auxillary memory void audioDB::index_initialize(double **snp, double **vsnp, double **spp, double **vspp, Uns32T *dvp) { @@ -132,81 +52,6 @@ lseek(dbfid, dbH->dataOffset, SEEK_SET); } - -/********************* LSH shingle construction ***************************/ - -// Construct shingles out of a feature matrix -// inputs: -// idx is vector index in feature matrix -// fvp is base feature matrix pointer double* [numVecs x dbH->dim] -// -// pre-conditions: -// dbH->dim -// sequenceLength -// idx < numVectors - sequenceLength + 1 -// -// post-conditions: -// (*vv)[idx] contains a shingle with dbH->dim*sequenceLength float values - -static void audiodb_index_make_shingle(vector<vector<float> >* vv, Uns32T idx, double* fvp, Uns32T dim, Uns32T seqLen){ - assert(idx<(*vv).size()); - vector<float>::iterator ve = (*vv)[idx].end(); - vector<float>::iterator vi = (*vv)[idx].begin(); - // First feature vector in shingle - if(idx == 0) { - while(vi!=ve) { - *vi++ = (float)(*fvp++); - } - } else { - // Not first feature vector in shingle - vector<float>::iterator ui=(*vv)[idx-1].begin() + dim; - // Previous seqLen-1 dim-vectors - while(vi!=ve-dim) { - *vi++ = *ui++; - } - // Move data pointer to next feature vector - fvp += ( seqLen + idx - 1 ) * dim ; - // New d-vector - while(vi!=ve) { - *vi++ = (float)(*fvp++); - } - } -} - -// norm shingles -// in-place norming, no deletions -// If using power, return number of shingles above power threshold -int audiodb_index_norm_shingles(vector<vector<float> >* vv, double* snp, double* spp, Uns32T dim, Uns32T seqLen, double radius, bool normed_vectors, bool use_pthreshold, float pthreshold) { - int z = 0; // number of above-threshold shingles - float l2norm; - double power; - float oneOverRadius = 1./(float)sqrt(radius); // Passed radius is really radius^2 - float oneOverSqrtl2NormDivRad = oneOverRadius; - Uns32T shingleSize = seqLen * dim; - - if(!spp) { - return -1; - } - for(Uns32T a=0; a<(*vv).size(); a++){ - l2norm = (float)(*snp++); - if(normed_vectors) - oneOverSqrtl2NormDivRad = (1./l2norm)*oneOverRadius; - - for(Uns32T b=0; b < shingleSize ; b++) - (*vv)[a][b]*=oneOverSqrtl2NormDivRad; - - power = *spp++; - if(use_pthreshold){ - if (power >= pthreshold) - z++; - } - else - z++; - } - return z; -} - - /************************ LSH indexing ***********************************/ void audioDB::index_index_db(const char* dbName){ char* newIndexName; @@ -511,173 +356,3 @@ } return collisionCount; } - -/*********************** LSH retrieval ****************************/ - - -// return true if indexed query performed else return false -int audiodb_index_init_query(adb_t *adb, const adb_query_spec_t *spec, adb_qstate_internal_t *qstate, bool corep) { - - uint32_t sequence_length = spec->qid.sequence_length; - double radius = spec->refine.radius; - if(!(audiodb_index_exists(adb->path, radius, sequence_length))) - return false; - - char *indexName = audiodb_index_get_name(adb->path, radius, sequence_length); - if(!indexName) { - return false; - } - - qstate->lsh = audiodb_index_allocate(adb, indexName, corep); - - /* FIXME: it would be nice if the LSH library didn't make me do - * this. */ - if((!corep) && (qstate->lsh->get_lshHeader()->flags & O2_SERIAL_FILEFORMAT2)) { - delete qstate->lsh; - qstate->lsh = audiodb_index_allocate(adb, indexName, true); - } - - delete[] indexName; - return true; -} - -void audiodb_index_add_point_approximate(void *user_data, Uns32T pointID, Uns32T qpos, float dist) { - adb_qcallback_t *data = (adb_qcallback_t *) user_data; - adb_t *adb = data->adb; - adb_qstate_internal_t *qstate = data->qstate; - uint32_t nbits = audiodb_lsh_n_point_bits(adb); - uint32_t trackID = audiodb_index_to_track_id(pointID, nbits); - uint32_t spos = audiodb_index_to_track_pos(pointID, nbits); - std::set<std::string>::iterator keys_end = qstate->allowed_keys->end(); - if(qstate->allowed_keys->find((*adb->keys)[trackID]) != keys_end) { - adb_result_t r; - r.key = (*adb->keys)[trackID].c_str(); - r.dist = dist; - r.qpos = qpos; - r.ipos = spos; - qstate->accumulator->add_point(&r); - } -} - -// Maintain a queue of points to pass to audiodb_query_queue_loop() -// for exact evaluation -void audiodb_index_add_point_exact(void *user_data, Uns32T pointID, Uns32T qpos, float dist) { - adb_qcallback_t *data = (adb_qcallback_t *) user_data; - adb_t *adb = data->adb; - adb_qstate_internal_t *qstate = data->qstate; - uint32_t nbits = audiodb_lsh_n_point_bits(adb); - uint32_t trackID = audiodb_index_to_track_id(pointID, nbits); - uint32_t spos = audiodb_index_to_track_pos(pointID, nbits); - std::set<std::string>::iterator keys_end = qstate->allowed_keys->end(); - if(qstate->allowed_keys->find((*adb->keys)[trackID]) != keys_end) { - PointPair p(trackID, qpos, spos); - qstate->exact_evaluation_queue->push(p); - } -} - -// return -1 on error -// return 0: if index does not exist -// return nqv: if index exists -int audiodb_index_query_loop(adb_t *adb, const adb_query_spec_t *spec, adb_qstate_internal_t *qstate) { - - double *query = 0, *query_data = 0; - adb_qpointers_internal_t qpointers = {0}; - - adb_qcallback_t callback_data; - callback_data.adb = adb; - callback_data.qstate = qstate; - - void (*add_point_func)(void *, uint32_t, uint32_t, float); - - uint32_t sequence_length = spec->qid.sequence_length; - bool normalized = (spec->params.distance == ADB_DISTANCE_EUCLIDEAN_NORMED); - double radius = spec->refine.radius; - bool use_absolute_threshold = spec->refine.flags & ADB_REFINE_ABSOLUTE_THRESHOLD; - double absolute_threshold = spec->refine.absolute_threshold; - - if(spec->qid.flags & ADB_QID_FLAG_ALLOW_FALSE_POSITIVES) { - add_point_func = &audiodb_index_add_point_approximate; - } else { - qstate->exact_evaluation_queue = new std::priority_queue<PointPair>; - add_point_func = &audiodb_index_add_point_exact; - } - - /* FIXME: this hardwired lsh_in_core is here to allow for a - * transition period while the need for the argument is worked - * through. Hopefully it will disappear again eventually. */ - bool lsh_in_core = true; - - if(!audiodb_index_init_query(adb, spec, qstate, lsh_in_core)) { - return 0; - } - - char *database = audiodb_index_get_name(adb->path, radius, sequence_length); - if(!database) { - return -1; - } - - if(audiodb_query_spec_qpointers(adb, spec, &query_data, &query, &qpointers)) { - delete [] database; - return -1; - } - - uint32_t Nq = (qpointers.nvectors > O2_MAXTRACKLEN ? O2_MAXTRACKLEN : qpointers.nvectors) - sequence_length + 1; - std::vector<std::vector<float> > *vv = audiodb_index_initialize_shingles(Nq, adb->header->dim, sequence_length); - - // Construct shingles from query features - for(uint32_t pointID = 0; pointID < Nq; pointID++) { - audiodb_index_make_shingle(vv, pointID, query, adb->header->dim, sequence_length); - } - - // Normalize query vectors - int vcount = audiodb_index_norm_shingles(vv, qpointers.l2norm, qpointers.power, adb->header->dim, sequence_length, radius, normalized, use_absolute_threshold, absolute_threshold); - if(vcount == -1) { - audiodb_index_delete_shingles(vv); - delete [] database; - return -1; - } - uint32_t numVecsAboveThreshold = vcount; - - // Nq contains number of inspected points in query file, - // numVecsAboveThreshold is number of points with power >= absolute_threshold - double *qpp = qpointers.power; // Keep original qpPtr for possible exact evaluation - if(!(spec->qid.flags & ADB_QID_FLAG_EXHAUSTIVE) && numVecsAboveThreshold) { - if((qstate->lsh->get_lshHeader()->flags & O2_SERIAL_FILEFORMAT2) || lsh_in_core) { - qstate->lsh->retrieve_point((*vv)[0], spec->qid.sequence_start, add_point_func, &callback_data); - } else { - qstate->lsh->serial_retrieve_point(database, (*vv)[0], spec->qid.sequence_start, add_point_func, &callback_data); - } - } else if(numVecsAboveThreshold) { - for(uint32_t pointID = 0; pointID < Nq; pointID++) { - if(!use_absolute_threshold || (use_absolute_threshold && (*qpp++ >= absolute_threshold))) { - if((qstate->lsh->get_lshHeader()->flags & O2_SERIAL_FILEFORMAT2) || lsh_in_core) { - qstate->lsh->retrieve_point((*vv)[pointID], pointID, add_point_func, &callback_data); - } else { - qstate->lsh->serial_retrieve_point(database, (*vv)[pointID], pointID, add_point_func, &callback_data); - } - } - } - } - audiodb_index_delete_shingles(vv); - - if(!(spec->qid.flags & ADB_QID_FLAG_ALLOW_FALSE_POSITIVES)) { - audiodb_query_queue_loop(adb, spec, qstate, query, &qpointers); - } - - // Clean up - if(query_data) - delete[] query_data; - if(qpointers.l2norm_data) - delete[] qpointers.l2norm_data; - if(qpointers.power_data) - delete[] qpointers.power_data; - if(qpointers.mean_duration) - delete[] qpointers.mean_duration; - if(database) - delete[] database; - if(qstate->lsh != adb->cached_lsh) - delete qstate->lsh; - - return Nq; -} -