annotate query-indexed.cpp @ 534:57e459f62788

Removed LSH_N_POINT_BITS coding for LSH index. Now uses binay search via STL lower_bound to locate tracks and positions from global pointID searching over cumulative track lengths. MERGE of branches/multiprobeLSH -r 819:821 onto trunk. This is a non backward-compatible change; WARNING generated on attempt to use INDEXING with older audioDB databases. Only INDEXES are broken, not ADB instances.
author mas01mc
date Wed, 04 Feb 2009 10:45:57 +0000
parents a30948382f56
children 9119f2fa3efe
rev   line source
mas01cr@509 1 extern "C" {
mas01cr@509 2 #include "audioDB_API.h"
mas01cr@509 3 }
mas01cr@509 4 #include "audioDB-internals.h"
mas01cr@509 5
mas01cr@509 6 /*
mas01cr@509 7 * Routines and datastructures which are specific to indexed queries.
mas01cr@509 8 */
mas01cr@509 9 typedef struct adb_qcallback {
mas01cr@509 10 adb_t *adb;
mas01cr@509 11 adb_qstate_internal_t *qstate;
mas01cr@509 12 } adb_qcallback_t;
mas01cr@509 13
mas01cr@509 14 // return true if indexed query performed else return false
mas01cr@509 15 int audiodb_index_init_query(adb_t *adb, const adb_query_spec_t *spec, adb_qstate_internal_t *qstate, bool corep) {
mas01cr@509 16
mas01cr@509 17 uint32_t sequence_length = spec->qid.sequence_length;
mas01cr@509 18 double radius = spec->refine.radius;
mas01cr@509 19 if(!(audiodb_index_exists(adb->path, radius, sequence_length)))
mas01cr@509 20 return false;
mas01cr@509 21
mas01cr@509 22 char *indexName = audiodb_index_get_name(adb->path, radius, sequence_length);
mas01cr@509 23 if(!indexName) {
mas01cr@509 24 return false;
mas01cr@509 25 }
mas01cr@509 26
mas01cr@509 27 qstate->lsh = audiodb_index_allocate(adb, indexName, corep);
mas01cr@509 28
mas01cr@509 29 /* FIXME: it would be nice if the LSH library didn't make me do
mas01cr@509 30 * this. */
mas01cr@509 31 if((!corep) && (qstate->lsh->get_lshHeader()->flags & O2_SERIAL_FILEFORMAT2)) {
mas01cr@509 32 delete qstate->lsh;
mas01cr@509 33 qstate->lsh = audiodb_index_allocate(adb, indexName, true);
mas01mc@513 34 #ifdef LSH_DUMP_CORE_TABLES
mas01mc@513 35 qstate->lsh->dump_hashtables();
mas01mc@513 36 #endif
mas01cr@509 37 }
mas01cr@509 38
mas01cr@509 39 delete[] indexName;
mas01cr@509 40 return true;
mas01cr@509 41 }
mas01cr@509 42
mas01cr@509 43 void audiodb_index_add_point_approximate(void *user_data, Uns32T pointID, Uns32T qpos, float dist) {
mas01cr@509 44 adb_qcallback_t *data = (adb_qcallback_t *) user_data;
mas01cr@509 45 adb_t *adb = data->adb;
mas01cr@509 46 adb_qstate_internal_t *qstate = data->qstate;
mas01mc@534 47 uint32_t trackID = audiodb_index_to_track_id(adb, pointID);
mas01mc@534 48 uint32_t spos = audiodb_index_to_track_pos(adb, trackID, pointID);
mas01cr@509 49 std::set<std::string>::iterator keys_end = qstate->allowed_keys->end();
mas01cr@509 50 if(qstate->allowed_keys->find((*adb->keys)[trackID]) != keys_end) {
mas01cr@509 51 adb_result_t r;
mas01cr@509 52 r.key = (*adb->keys)[trackID].c_str();
mas01cr@509 53 r.dist = dist;
mas01cr@509 54 r.qpos = qpos;
mas01cr@509 55 r.ipos = spos;
mas01cr@509 56 qstate->accumulator->add_point(&r);
mas01cr@509 57 }
mas01cr@509 58 }
mas01cr@509 59
mas01cr@509 60 // Maintain a queue of points to pass to audiodb_query_queue_loop()
mas01cr@509 61 // for exact evaluation
mas01cr@509 62 void audiodb_index_add_point_exact(void *user_data, Uns32T pointID, Uns32T qpos, float dist) {
mas01cr@509 63 adb_qcallback_t *data = (adb_qcallback_t *) user_data;
mas01cr@509 64 adb_t *adb = data->adb;
mas01cr@509 65 adb_qstate_internal_t *qstate = data->qstate;
mas01mc@534 66 uint32_t trackID = audiodb_index_to_track_id(adb, pointID);
mas01mc@534 67 uint32_t spos = audiodb_index_to_track_pos(adb, trackID, pointID);
mas01cr@509 68 std::set<std::string>::iterator keys_end = qstate->allowed_keys->end();
mas01cr@509 69 if(qstate->allowed_keys->find((*adb->keys)[trackID]) != keys_end) {
mas01cr@509 70 PointPair p(trackID, qpos, spos);
mas01cr@509 71 qstate->exact_evaluation_queue->push(p);
mas01cr@509 72 }
mas01cr@509 73 }
mas01cr@509 74
mas01cr@509 75 // return -1 on error
mas01cr@509 76 // return 0: if index does not exist
mas01cr@509 77 // return nqv: if index exists
mas01cr@509 78 int audiodb_index_query_loop(adb_t *adb, const adb_query_spec_t *spec, adb_qstate_internal_t *qstate) {
mas01mc@534 79 if(adb->header->flags>>28)
mas01mc@534 80 cerr << "WARNING: Database created using deprecated LSH_N_POINT_BITS coding: REBUILD INDEXES..." << endl;
mas01mc@534 81
mas01cr@509 82 double *query = 0, *query_data = 0;
mas01cr@509 83 adb_qpointers_internal_t qpointers = {0};
mas01cr@509 84
mas01cr@509 85 adb_qcallback_t callback_data;
mas01cr@509 86 callback_data.adb = adb;
mas01cr@509 87 callback_data.qstate = qstate;
mas01cr@509 88
mas01cr@509 89 void (*add_point_func)(void *, uint32_t, uint32_t, float);
mas01cr@509 90
mas01cr@509 91 uint32_t sequence_length = spec->qid.sequence_length;
mas01cr@509 92 bool normalized = (spec->params.distance == ADB_DISTANCE_EUCLIDEAN_NORMED);
mas01cr@509 93 double radius = spec->refine.radius;
mas01cr@509 94 bool use_absolute_threshold = spec->refine.flags & ADB_REFINE_ABSOLUTE_THRESHOLD;
mas01cr@509 95 double absolute_threshold = spec->refine.absolute_threshold;
mas01cr@509 96
mas01cr@509 97 if(spec->qid.flags & ADB_QID_FLAG_ALLOW_FALSE_POSITIVES) {
mas01cr@509 98 add_point_func = &audiodb_index_add_point_approximate;
mas01cr@509 99 } else {
mas01cr@509 100 qstate->exact_evaluation_queue = new std::priority_queue<PointPair>;
mas01cr@509 101 add_point_func = &audiodb_index_add_point_exact;
mas01cr@509 102 }
mas01cr@509 103
mas01cr@509 104 /* FIXME: this hardwired lsh_in_core is here to allow for a
mas01cr@509 105 * transition period while the need for the argument is worked
mas01cr@509 106 * through. Hopefully it will disappear again eventually. */
mas01cr@509 107 bool lsh_in_core = true;
mas01cr@509 108
mas01cr@509 109 if(!audiodb_index_init_query(adb, spec, qstate, lsh_in_core)) {
mas01cr@509 110 return 0;
mas01cr@509 111 }
mas01cr@509 112
mas01cr@509 113 char *database = audiodb_index_get_name(adb->path, radius, sequence_length);
mas01cr@509 114 if(!database) {
mas01cr@509 115 return -1;
mas01cr@509 116 }
mas01cr@509 117
mas01cr@509 118 if(audiodb_query_spec_qpointers(adb, spec, &query_data, &query, &qpointers)) {
mas01cr@509 119 delete [] database;
mas01cr@509 120 return -1;
mas01cr@509 121 }
mas01cr@509 122
mas01mc@534 123 uint32_t Nq = qpointers.nvectors - sequence_length + 1;
mas01cr@509 124 std::vector<std::vector<float> > *vv = audiodb_index_initialize_shingles(Nq, adb->header->dim, sequence_length);
mas01cr@509 125
mas01cr@509 126 // Construct shingles from query features
mas01cr@509 127 for(uint32_t pointID = 0; pointID < Nq; pointID++) {
mas01cr@509 128 audiodb_index_make_shingle(vv, pointID, query, adb->header->dim, sequence_length);
mas01cr@509 129 }
mas01cr@509 130
mas01cr@509 131 // Normalize query vectors
mas01cr@509 132 int vcount = audiodb_index_norm_shingles(vv, qpointers.l2norm, qpointers.power, adb->header->dim, sequence_length, radius, normalized, use_absolute_threshold, absolute_threshold);
mas01cr@509 133 if(vcount == -1) {
mas01cr@509 134 audiodb_index_delete_shingles(vv);
mas01cr@509 135 delete [] database;
mas01cr@509 136 return -1;
mas01cr@509 137 }
mas01cr@509 138 uint32_t numVecsAboveThreshold = vcount;
mas01cr@509 139
mas01cr@509 140 // Nq contains number of inspected points in query file,
mas01cr@509 141 // numVecsAboveThreshold is number of points with power >= absolute_threshold
mas01cr@509 142 double *qpp = qpointers.power; // Keep original qpPtr for possible exact evaluation
mas01cr@509 143 if(!(spec->qid.flags & ADB_QID_FLAG_EXHAUSTIVE) && numVecsAboveThreshold) {
mas01cr@509 144 if((qstate->lsh->get_lshHeader()->flags & O2_SERIAL_FILEFORMAT2) || lsh_in_core) {
mas01cr@509 145 qstate->lsh->retrieve_point((*vv)[0], spec->qid.sequence_start, add_point_func, &callback_data);
mas01cr@509 146 } else {
mas01cr@509 147 qstate->lsh->serial_retrieve_point(database, (*vv)[0], spec->qid.sequence_start, add_point_func, &callback_data);
mas01cr@509 148 }
mas01cr@509 149 } else if(numVecsAboveThreshold) {
mas01cr@509 150 for(uint32_t pointID = 0; pointID < Nq; pointID++) {
mas01cr@509 151 if(!use_absolute_threshold || (use_absolute_threshold && (*qpp++ >= absolute_threshold))) {
mas01cr@509 152 if((qstate->lsh->get_lshHeader()->flags & O2_SERIAL_FILEFORMAT2) || lsh_in_core) {
mas01cr@509 153 qstate->lsh->retrieve_point((*vv)[pointID], pointID, add_point_func, &callback_data);
mas01cr@509 154 } else {
mas01cr@509 155 qstate->lsh->serial_retrieve_point(database, (*vv)[pointID], pointID, add_point_func, &callback_data);
mas01cr@509 156 }
mas01cr@509 157 }
mas01cr@509 158 }
mas01cr@509 159 }
mas01cr@509 160 audiodb_index_delete_shingles(vv);
mas01cr@509 161
mas01cr@509 162 if(!(spec->qid.flags & ADB_QID_FLAG_ALLOW_FALSE_POSITIVES)) {
mas01cr@509 163 audiodb_query_queue_loop(adb, spec, qstate, query, &qpointers);
mas01cr@509 164 }
mas01cr@509 165
mas01cr@509 166 // Clean up
mas01cr@509 167 if(query_data)
mas01cr@509 168 delete[] query_data;
mas01cr@509 169 if(qpointers.l2norm_data)
mas01cr@509 170 delete[] qpointers.l2norm_data;
mas01cr@509 171 if(qpointers.power_data)
mas01cr@509 172 delete[] qpointers.power_data;
mas01cr@509 173 if(qpointers.mean_duration)
mas01cr@509 174 delete[] qpointers.mean_duration;
mas01cr@509 175 if(database)
mas01cr@509 176 delete[] database;
mas01cr@509 177 if(qstate->lsh != adb->cached_lsh)
mas01cr@509 178 delete qstate->lsh;
mas01cr@509 179
mas01cr@509 180 return Nq;
mas01cr@509 181 }