annotate query-indexed.cpp @ 770:c54bc2ffbf92 tip

update tags
author convert-repo
date Fri, 16 Dec 2011 11:34:01 +0000
parents a35ca2d5f238
children
rev   line source
mas01cr@509 1 extern "C" {
mas01cr@509 2 #include "audioDB_API.h"
mas01cr@509 3 }
mas01cr@509 4 #include "audioDB-internals.h"
mas01cr@589 5 #include "lshlib.h"
mas01cr@509 6
mas01cr@509 7 /*
mas01cr@509 8 * Routines and datastructures which are specific to indexed queries.
mas01cr@509 9 */
mas01cr@509 10 typedef struct adb_qcallback {
mas01cr@509 11 adb_t *adb;
mas01cr@509 12 adb_qstate_internal_t *qstate;
mas01cr@509 13 } adb_qcallback_t;
mas01cr@509 14
mas01cr@509 15 // return true if indexed query performed else return false
mas01cr@509 16 int audiodb_index_init_query(adb_t *adb, const adb_query_spec_t *spec, adb_qstate_internal_t *qstate, bool corep) {
mas01cr@509 17
mas01cr@509 18 uint32_t sequence_length = spec->qid.sequence_length;
mas01cr@509 19 double radius = spec->refine.radius;
mas01cr@509 20 if(!(audiodb_index_exists(adb->path, radius, sequence_length)))
mas01cr@509 21 return false;
mas01cr@509 22
mas01cr@509 23 char *indexName = audiodb_index_get_name(adb->path, radius, sequence_length);
mas01cr@509 24 if(!indexName) {
mas01cr@509 25 return false;
mas01cr@509 26 }
mas01cr@509 27
mas01cr@509 28 qstate->lsh = audiodb_index_allocate(adb, indexName, corep);
mas01cr@672 29 qstate->qkey = spec->qid.datum->key;
mas01cr@509 30
mas01cr@509 31 /* FIXME: it would be nice if the LSH library didn't make me do
mas01cr@509 32 * this. */
mas01cr@509 33 if((!corep) && (qstate->lsh->get_lshHeader()->flags & O2_SERIAL_FILEFORMAT2)) {
mas01cr@509 34 delete qstate->lsh;
mas01cr@509 35 qstate->lsh = audiodb_index_allocate(adb, indexName, true);
mas01mc@513 36 #ifdef LSH_DUMP_CORE_TABLES
mas01mc@513 37 qstate->lsh->dump_hashtables();
mas01mc@513 38 #endif
mas01cr@509 39 }
mas01cr@509 40
mas01cr@509 41 delete[] indexName;
mas01cr@509 42 return true;
mas01cr@509 43 }
mas01cr@509 44
mas01cr@589 45 void audiodb_index_add_point_approximate(void *user_data, uint32_t pointID, uint32_t qpos, float dist) {
mas01cr@509 46 adb_qcallback_t *data = (adb_qcallback_t *) user_data;
mas01cr@509 47 adb_t *adb = data->adb;
mas01cr@509 48 adb_qstate_internal_t *qstate = data->qstate;
mas01mc@534 49 uint32_t trackID = audiodb_index_to_track_id(adb, pointID);
mas01mc@534 50 uint32_t spos = audiodb_index_to_track_pos(adb, trackID, pointID);
mas01cr@509 51 std::set<std::string>::iterator keys_end = qstate->allowed_keys->end();
mas01cr@509 52 if(qstate->allowed_keys->find((*adb->keys)[trackID]) != keys_end) {
mas01cr@509 53 adb_result_t r;
mas01cr@672 54 r.ikey = (*adb->keys)[trackID].c_str();
mas01cr@672 55 r.qkey = qstate->qkey;
mas01cr@509 56 r.dist = dist;
mas01cr@509 57 r.qpos = qpos;
mas01cr@509 58 r.ipos = spos;
mas01cr@610 59 if(qstate->set->find(r) == qstate->set->end()) {
mas01cr@610 60 qstate->set->insert(r);
mas01cr@610 61 qstate->accumulator->add_point(&r);
mas01cr@610 62 }
mas01cr@509 63 }
mas01cr@509 64 }
mas01cr@509 65
mas01cr@509 66 // Maintain a queue of points to pass to audiodb_query_queue_loop()
mas01cr@509 67 // for exact evaluation
mas01cr@589 68 void audiodb_index_add_point_exact(void *user_data, uint32_t pointID, uint32_t qpos, float dist) {
mas01cr@509 69 adb_qcallback_t *data = (adb_qcallback_t *) user_data;
mas01cr@509 70 adb_t *adb = data->adb;
mas01cr@509 71 adb_qstate_internal_t *qstate = data->qstate;
mas01mc@534 72 uint32_t trackID = audiodb_index_to_track_id(adb, pointID);
mas01mc@534 73 uint32_t spos = audiodb_index_to_track_pos(adb, trackID, pointID);
mas01cr@509 74 std::set<std::string>::iterator keys_end = qstate->allowed_keys->end();
mas01cr@509 75 if(qstate->allowed_keys->find((*adb->keys)[trackID]) != keys_end) {
mas01cr@509 76 PointPair p(trackID, qpos, spos);
mas01cr@509 77 qstate->exact_evaluation_queue->push(p);
mas01cr@509 78 }
mas01cr@509 79 }
mas01cr@509 80
mas01cr@509 81 // return -1 on error
mas01cr@509 82 // return 0: if index does not exist
mas01cr@509 83 // return nqv: if index exists
mas01cr@509 84 int audiodb_index_query_loop(adb_t *adb, const adb_query_spec_t *spec, adb_qstate_internal_t *qstate) {
mas01mc@534 85 if(adb->header->flags>>28)
mas01mc@534 86 cerr << "WARNING: Database created using deprecated LSH_N_POINT_BITS coding: REBUILD INDEXES..." << endl;
mas01mc@534 87
mas01cr@509 88 double *query = 0, *query_data = 0;
mas01cr@509 89 adb_qpointers_internal_t qpointers = {0};
mas01cr@509 90
mas01cr@509 91 adb_qcallback_t callback_data;
mas01cr@509 92 callback_data.adb = adb;
mas01cr@509 93 callback_data.qstate = qstate;
mas01cr@509 94
mas01cr@509 95 void (*add_point_func)(void *, uint32_t, uint32_t, float);
mas01cr@509 96
mas01cr@509 97 uint32_t sequence_length = spec->qid.sequence_length;
mas01cr@509 98 bool normalized = (spec->params.distance == ADB_DISTANCE_EUCLIDEAN_NORMED);
mas01cr@509 99 double radius = spec->refine.radius;
mas01cr@509 100 bool use_absolute_threshold = spec->refine.flags & ADB_REFINE_ABSOLUTE_THRESHOLD;
mas01cr@509 101 double absolute_threshold = spec->refine.absolute_threshold;
mas01cr@509 102
mas01cr@610 103 qstate->set = new std::set< adb_result_t, adb_result_triple_lt >;
mas01cr@610 104
mas01cr@509 105 if(spec->qid.flags & ADB_QID_FLAG_ALLOW_FALSE_POSITIVES) {
mas01cr@509 106 add_point_func = &audiodb_index_add_point_approximate;
mas01cr@509 107 } else {
mas01cr@509 108 qstate->exact_evaluation_queue = new std::priority_queue<PointPair>;
mas01cr@509 109 add_point_func = &audiodb_index_add_point_exact;
mas01cr@509 110 }
mas01cr@509 111
mas01cr@509 112 /* FIXME: this hardwired lsh_in_core is here to allow for a
mas01cr@509 113 * transition period while the need for the argument is worked
mas01cr@509 114 * through. Hopefully it will disappear again eventually. */
mas01cr@509 115 bool lsh_in_core = true;
mas01cr@509 116
mas01cr@509 117 if(!audiodb_index_init_query(adb, spec, qstate, lsh_in_core)) {
mas01cr@509 118 return 0;
mas01cr@509 119 }
mas01cr@509 120
mas01cr@509 121 char *database = audiodb_index_get_name(adb->path, radius, sequence_length);
mas01cr@509 122 if(!database) {
mas01cr@509 123 return -1;
mas01cr@509 124 }
mas01cr@509 125
mas01cr@509 126 if(audiodb_query_spec_qpointers(adb, spec, &query_data, &query, &qpointers)) {
mas01cr@509 127 delete [] database;
mas01cr@509 128 return -1;
mas01cr@509 129 }
mas01cr@509 130
mas01mc@534 131 uint32_t Nq = qpointers.nvectors - sequence_length + 1;
mas01cr@509 132 std::vector<std::vector<float> > *vv = audiodb_index_initialize_shingles(Nq, adb->header->dim, sequence_length);
mas01cr@509 133
mas01cr@509 134 // Construct shingles from query features
mas01cr@509 135 for(uint32_t pointID = 0; pointID < Nq; pointID++) {
mas01cr@509 136 audiodb_index_make_shingle(vv, pointID, query, adb->header->dim, sequence_length);
mas01cr@509 137 }
mas01cr@509 138
mas01cr@509 139 // Normalize query vectors
mas01cr@509 140 int vcount = audiodb_index_norm_shingles(vv, qpointers.l2norm, qpointers.power, adb->header->dim, sequence_length, radius, normalized, use_absolute_threshold, absolute_threshold);
mas01cr@509 141 if(vcount == -1) {
mas01cr@509 142 audiodb_index_delete_shingles(vv);
mas01cr@509 143 delete [] database;
mas01cr@509 144 return -1;
mas01cr@509 145 }
mas01cr@509 146 uint32_t numVecsAboveThreshold = vcount;
mas01cr@509 147
mas01cr@509 148 // Nq contains number of inspected points in query file,
mas01cr@509 149 // numVecsAboveThreshold is number of points with power >= absolute_threshold
mas01cr@509 150 double *qpp = qpointers.power; // Keep original qpPtr for possible exact evaluation
mas01cr@509 151 if(!(spec->qid.flags & ADB_QID_FLAG_EXHAUSTIVE) && numVecsAboveThreshold) {
mas01cr@509 152 if((qstate->lsh->get_lshHeader()->flags & O2_SERIAL_FILEFORMAT2) || lsh_in_core) {
mas01cr@509 153 qstate->lsh->retrieve_point((*vv)[0], spec->qid.sequence_start, add_point_func, &callback_data);
mas01cr@509 154 } else {
mas01cr@509 155 qstate->lsh->serial_retrieve_point(database, (*vv)[0], spec->qid.sequence_start, add_point_func, &callback_data);
mas01cr@509 156 }
mas01cr@509 157 } else if(numVecsAboveThreshold) {
mas01cr@509 158 for(uint32_t pointID = 0; pointID < Nq; pointID++) {
mas01cr@509 159 if(!use_absolute_threshold || (use_absolute_threshold && (*qpp++ >= absolute_threshold))) {
mas01cr@509 160 if((qstate->lsh->get_lshHeader()->flags & O2_SERIAL_FILEFORMAT2) || lsh_in_core) {
mas01cr@509 161 qstate->lsh->retrieve_point((*vv)[pointID], pointID, add_point_func, &callback_data);
mas01cr@509 162 } else {
mas01cr@509 163 qstate->lsh->serial_retrieve_point(database, (*vv)[pointID], pointID, add_point_func, &callback_data);
mas01cr@509 164 }
mas01cr@509 165 }
mas01cr@509 166 }
mas01cr@509 167 }
mas01cr@509 168 audiodb_index_delete_shingles(vv);
mas01cr@509 169
mas01cr@509 170 if(!(spec->qid.flags & ADB_QID_FLAG_ALLOW_FALSE_POSITIVES)) {
mas01cr@509 171 audiodb_query_queue_loop(adb, spec, qstate, query, &qpointers);
mas01cr@509 172 }
mas01cr@610 173
mas01cr@610 174 delete qstate->set;
mas01cr@610 175
mas01cr@509 176
mas01cr@509 177 // Clean up
mas01cr@509 178 if(query_data)
mas01cr@509 179 delete[] query_data;
mas01cr@509 180 if(qpointers.l2norm_data)
mas01cr@509 181 delete[] qpointers.l2norm_data;
mas01cr@509 182 if(qpointers.power_data)
mas01cr@509 183 delete[] qpointers.power_data;
mas01cr@509 184 if(qpointers.mean_duration)
mas01cr@509 185 delete[] qpointers.mean_duration;
mas01cr@509 186 if(database)
mas01cr@509 187 delete[] database;
mas01cr@509 188 if(qstate->lsh != adb->cached_lsh)
mas01cr@509 189 delete qstate->lsh;
mas01cr@509 190
mas01cr@509 191 return Nq;
mas01cr@509 192 }