annotate query-indexed.cpp @ 509:cc2b97d020b1

Code rearrangements to tease apart library code from C++ audioDB code. There should be precisely no functional changes in this commit. Instead, the only thing that has happened is that all the abstraction violation and other horribleness is concentrated in one place: the include of "audioDB-internals.h" in audioDB.h -- the separation will be complete once that include can be removed. This include is necessary because the command-line binary / SOAP server still does some things directly rather than through an API: not least of which the operations that have not yet been integrated into the API yet, but also some messing around with constants, flags and nominally internal functions. The intent is to remove as many of these as possible and think quite hard about the rest. In the meantime, the library is now much more self-contained: the only things it uses are in the audioDB_API.h and audioDB-internals.h headers; thus there are fewer nasty surprises lurking for readers of the code. The Makefile has been adjusted to take advantage of this rearrangement in the dependencies.
author mas01cr
date Thu, 15 Jan 2009 13:57:33 +0000
parents
children a30948382f56
rev   line source
mas01cr@509 1 extern "C" {
mas01cr@509 2 #include "audioDB_API.h"
mas01cr@509 3 }
mas01cr@509 4 #include "audioDB-internals.h"
mas01cr@509 5
mas01cr@509 6 /*
mas01cr@509 7 * Routines and datastructures which are specific to indexed queries.
mas01cr@509 8 */
mas01cr@509 9 typedef struct adb_qcallback {
mas01cr@509 10 adb_t *adb;
mas01cr@509 11 adb_qstate_internal_t *qstate;
mas01cr@509 12 } adb_qcallback_t;
mas01cr@509 13
mas01cr@509 14 // return true if indexed query performed else return false
mas01cr@509 15 int audiodb_index_init_query(adb_t *adb, const adb_query_spec_t *spec, adb_qstate_internal_t *qstate, bool corep) {
mas01cr@509 16
mas01cr@509 17 uint32_t sequence_length = spec->qid.sequence_length;
mas01cr@509 18 double radius = spec->refine.radius;
mas01cr@509 19 if(!(audiodb_index_exists(adb->path, radius, sequence_length)))
mas01cr@509 20 return false;
mas01cr@509 21
mas01cr@509 22 char *indexName = audiodb_index_get_name(adb->path, radius, sequence_length);
mas01cr@509 23 if(!indexName) {
mas01cr@509 24 return false;
mas01cr@509 25 }
mas01cr@509 26
mas01cr@509 27 qstate->lsh = audiodb_index_allocate(adb, indexName, corep);
mas01cr@509 28
mas01cr@509 29 /* FIXME: it would be nice if the LSH library didn't make me do
mas01cr@509 30 * this. */
mas01cr@509 31 if((!corep) && (qstate->lsh->get_lshHeader()->flags & O2_SERIAL_FILEFORMAT2)) {
mas01cr@509 32 delete qstate->lsh;
mas01cr@509 33 qstate->lsh = audiodb_index_allocate(adb, indexName, true);
mas01cr@509 34 }
mas01cr@509 35
mas01cr@509 36 delete[] indexName;
mas01cr@509 37 return true;
mas01cr@509 38 }
mas01cr@509 39
mas01cr@509 40 void audiodb_index_add_point_approximate(void *user_data, Uns32T pointID, Uns32T qpos, float dist) {
mas01cr@509 41 adb_qcallback_t *data = (adb_qcallback_t *) user_data;
mas01cr@509 42 adb_t *adb = data->adb;
mas01cr@509 43 adb_qstate_internal_t *qstate = data->qstate;
mas01cr@509 44 uint32_t nbits = audiodb_lsh_n_point_bits(adb);
mas01cr@509 45 uint32_t trackID = audiodb_index_to_track_id(pointID, nbits);
mas01cr@509 46 uint32_t spos = audiodb_index_to_track_pos(pointID, nbits);
mas01cr@509 47 std::set<std::string>::iterator keys_end = qstate->allowed_keys->end();
mas01cr@509 48 if(qstate->allowed_keys->find((*adb->keys)[trackID]) != keys_end) {
mas01cr@509 49 adb_result_t r;
mas01cr@509 50 r.key = (*adb->keys)[trackID].c_str();
mas01cr@509 51 r.dist = dist;
mas01cr@509 52 r.qpos = qpos;
mas01cr@509 53 r.ipos = spos;
mas01cr@509 54 qstate->accumulator->add_point(&r);
mas01cr@509 55 }
mas01cr@509 56 }
mas01cr@509 57
mas01cr@509 58 // Maintain a queue of points to pass to audiodb_query_queue_loop()
mas01cr@509 59 // for exact evaluation
mas01cr@509 60 void audiodb_index_add_point_exact(void *user_data, Uns32T pointID, Uns32T qpos, float dist) {
mas01cr@509 61 adb_qcallback_t *data = (adb_qcallback_t *) user_data;
mas01cr@509 62 adb_t *adb = data->adb;
mas01cr@509 63 adb_qstate_internal_t *qstate = data->qstate;
mas01cr@509 64 uint32_t nbits = audiodb_lsh_n_point_bits(adb);
mas01cr@509 65 uint32_t trackID = audiodb_index_to_track_id(pointID, nbits);
mas01cr@509 66 uint32_t spos = audiodb_index_to_track_pos(pointID, nbits);
mas01cr@509 67 std::set<std::string>::iterator keys_end = qstate->allowed_keys->end();
mas01cr@509 68 if(qstate->allowed_keys->find((*adb->keys)[trackID]) != keys_end) {
mas01cr@509 69 PointPair p(trackID, qpos, spos);
mas01cr@509 70 qstate->exact_evaluation_queue->push(p);
mas01cr@509 71 }
mas01cr@509 72 }
mas01cr@509 73
mas01cr@509 74 // return -1 on error
mas01cr@509 75 // return 0: if index does not exist
mas01cr@509 76 // return nqv: if index exists
mas01cr@509 77 int audiodb_index_query_loop(adb_t *adb, const adb_query_spec_t *spec, adb_qstate_internal_t *qstate) {
mas01cr@509 78
mas01cr@509 79 double *query = 0, *query_data = 0;
mas01cr@509 80 adb_qpointers_internal_t qpointers = {0};
mas01cr@509 81
mas01cr@509 82 adb_qcallback_t callback_data;
mas01cr@509 83 callback_data.adb = adb;
mas01cr@509 84 callback_data.qstate = qstate;
mas01cr@509 85
mas01cr@509 86 void (*add_point_func)(void *, uint32_t, uint32_t, float);
mas01cr@509 87
mas01cr@509 88 uint32_t sequence_length = spec->qid.sequence_length;
mas01cr@509 89 bool normalized = (spec->params.distance == ADB_DISTANCE_EUCLIDEAN_NORMED);
mas01cr@509 90 double radius = spec->refine.radius;
mas01cr@509 91 bool use_absolute_threshold = spec->refine.flags & ADB_REFINE_ABSOLUTE_THRESHOLD;
mas01cr@509 92 double absolute_threshold = spec->refine.absolute_threshold;
mas01cr@509 93
mas01cr@509 94 if(spec->qid.flags & ADB_QID_FLAG_ALLOW_FALSE_POSITIVES) {
mas01cr@509 95 add_point_func = &audiodb_index_add_point_approximate;
mas01cr@509 96 } else {
mas01cr@509 97 qstate->exact_evaluation_queue = new std::priority_queue<PointPair>;
mas01cr@509 98 add_point_func = &audiodb_index_add_point_exact;
mas01cr@509 99 }
mas01cr@509 100
mas01cr@509 101 /* FIXME: this hardwired lsh_in_core is here to allow for a
mas01cr@509 102 * transition period while the need for the argument is worked
mas01cr@509 103 * through. Hopefully it will disappear again eventually. */
mas01cr@509 104 bool lsh_in_core = true;
mas01cr@509 105
mas01cr@509 106 if(!audiodb_index_init_query(adb, spec, qstate, lsh_in_core)) {
mas01cr@509 107 return 0;
mas01cr@509 108 }
mas01cr@509 109
mas01cr@509 110 char *database = audiodb_index_get_name(adb->path, radius, sequence_length);
mas01cr@509 111 if(!database) {
mas01cr@509 112 return -1;
mas01cr@509 113 }
mas01cr@509 114
mas01cr@509 115 if(audiodb_query_spec_qpointers(adb, spec, &query_data, &query, &qpointers)) {
mas01cr@509 116 delete [] database;
mas01cr@509 117 return -1;
mas01cr@509 118 }
mas01cr@509 119
mas01cr@509 120 uint32_t Nq = (qpointers.nvectors > ADB_LSH_MAXTRACKLEN ? ADB_LSH_MAXTRACKLEN : qpointers.nvectors) - sequence_length + 1;
mas01cr@509 121 std::vector<std::vector<float> > *vv = audiodb_index_initialize_shingles(Nq, adb->header->dim, sequence_length);
mas01cr@509 122
mas01cr@509 123 // Construct shingles from query features
mas01cr@509 124 for(uint32_t pointID = 0; pointID < Nq; pointID++) {
mas01cr@509 125 audiodb_index_make_shingle(vv, pointID, query, adb->header->dim, sequence_length);
mas01cr@509 126 }
mas01cr@509 127
mas01cr@509 128 // Normalize query vectors
mas01cr@509 129 int vcount = audiodb_index_norm_shingles(vv, qpointers.l2norm, qpointers.power, adb->header->dim, sequence_length, radius, normalized, use_absolute_threshold, absolute_threshold);
mas01cr@509 130 if(vcount == -1) {
mas01cr@509 131 audiodb_index_delete_shingles(vv);
mas01cr@509 132 delete [] database;
mas01cr@509 133 return -1;
mas01cr@509 134 }
mas01cr@509 135 uint32_t numVecsAboveThreshold = vcount;
mas01cr@509 136
mas01cr@509 137 // Nq contains number of inspected points in query file,
mas01cr@509 138 // numVecsAboveThreshold is number of points with power >= absolute_threshold
mas01cr@509 139 double *qpp = qpointers.power; // Keep original qpPtr for possible exact evaluation
mas01cr@509 140 if(!(spec->qid.flags & ADB_QID_FLAG_EXHAUSTIVE) && numVecsAboveThreshold) {
mas01cr@509 141 if((qstate->lsh->get_lshHeader()->flags & O2_SERIAL_FILEFORMAT2) || lsh_in_core) {
mas01cr@509 142 qstate->lsh->retrieve_point((*vv)[0], spec->qid.sequence_start, add_point_func, &callback_data);
mas01cr@509 143 } else {
mas01cr@509 144 qstate->lsh->serial_retrieve_point(database, (*vv)[0], spec->qid.sequence_start, add_point_func, &callback_data);
mas01cr@509 145 }
mas01cr@509 146 } else if(numVecsAboveThreshold) {
mas01cr@509 147 for(uint32_t pointID = 0; pointID < Nq; pointID++) {
mas01cr@509 148 if(!use_absolute_threshold || (use_absolute_threshold && (*qpp++ >= absolute_threshold))) {
mas01cr@509 149 if((qstate->lsh->get_lshHeader()->flags & O2_SERIAL_FILEFORMAT2) || lsh_in_core) {
mas01cr@509 150 qstate->lsh->retrieve_point((*vv)[pointID], pointID, add_point_func, &callback_data);
mas01cr@509 151 } else {
mas01cr@509 152 qstate->lsh->serial_retrieve_point(database, (*vv)[pointID], pointID, add_point_func, &callback_data);
mas01cr@509 153 }
mas01cr@509 154 }
mas01cr@509 155 }
mas01cr@509 156 }
mas01cr@509 157 audiodb_index_delete_shingles(vv);
mas01cr@509 158
mas01cr@509 159 if(!(spec->qid.flags & ADB_QID_FLAG_ALLOW_FALSE_POSITIVES)) {
mas01cr@509 160 audiodb_query_queue_loop(adb, spec, qstate, query, &qpointers);
mas01cr@509 161 }
mas01cr@509 162
mas01cr@509 163 // Clean up
mas01cr@509 164 if(query_data)
mas01cr@509 165 delete[] query_data;
mas01cr@509 166 if(qpointers.l2norm_data)
mas01cr@509 167 delete[] qpointers.l2norm_data;
mas01cr@509 168 if(qpointers.power_data)
mas01cr@509 169 delete[] qpointers.power_data;
mas01cr@509 170 if(qpointers.mean_duration)
mas01cr@509 171 delete[] qpointers.mean_duration;
mas01cr@509 172 if(database)
mas01cr@509 173 delete[] database;
mas01cr@509 174 if(qstate->lsh != adb->cached_lsh)
mas01cr@509 175 delete qstate->lsh;
mas01cr@509 176
mas01cr@509 177 return Nq;
mas01cr@509 178 }