mas01cr@509
|
1 extern "C" {
|
mas01cr@509
|
2 #include "audioDB_API.h"
|
mas01cr@509
|
3 }
|
mas01cr@509
|
4 #include "audioDB-internals.h"
|
mas01cr@589
|
5 #include "lshlib.h"
|
mas01cr@509
|
6
|
mas01cr@509
|
7 /*
|
mas01cr@509
|
8 * Routines and datastructures which are specific to indexed queries.
|
mas01cr@509
|
9 */
|
mas01cr@509
|
10 typedef struct adb_qcallback {
|
mas01cr@509
|
11 adb_t *adb;
|
mas01cr@509
|
12 adb_qstate_internal_t *qstate;
|
mas01cr@509
|
13 } adb_qcallback_t;
|
mas01cr@509
|
14
|
mas01cr@509
|
15 // return true if indexed query performed else return false
|
mas01cr@509
|
16 int audiodb_index_init_query(adb_t *adb, const adb_query_spec_t *spec, adb_qstate_internal_t *qstate, bool corep) {
|
mas01cr@509
|
17
|
mas01cr@509
|
18 uint32_t sequence_length = spec->qid.sequence_length;
|
mas01cr@509
|
19 double radius = spec->refine.radius;
|
mas01cr@509
|
20 if(!(audiodb_index_exists(adb->path, radius, sequence_length)))
|
mas01cr@509
|
21 return false;
|
mas01cr@509
|
22
|
mas01cr@509
|
23 char *indexName = audiodb_index_get_name(adb->path, radius, sequence_length);
|
mas01cr@509
|
24 if(!indexName) {
|
mas01cr@509
|
25 return false;
|
mas01cr@509
|
26 }
|
mas01cr@509
|
27
|
mas01cr@509
|
28 qstate->lsh = audiodb_index_allocate(adb, indexName, corep);
|
mas01cr@672
|
29 qstate->qkey = spec->qid.datum->key;
|
mas01cr@509
|
30
|
mas01cr@509
|
31 /* FIXME: it would be nice if the LSH library didn't make me do
|
mas01cr@509
|
32 * this. */
|
mas01cr@509
|
33 if((!corep) && (qstate->lsh->get_lshHeader()->flags & O2_SERIAL_FILEFORMAT2)) {
|
mas01cr@509
|
34 delete qstate->lsh;
|
mas01cr@509
|
35 qstate->lsh = audiodb_index_allocate(adb, indexName, true);
|
mas01mc@513
|
36 #ifdef LSH_DUMP_CORE_TABLES
|
mas01mc@513
|
37 qstate->lsh->dump_hashtables();
|
mas01mc@513
|
38 #endif
|
mas01cr@509
|
39 }
|
mas01cr@509
|
40
|
mas01cr@509
|
41 delete[] indexName;
|
mas01cr@509
|
42 return true;
|
mas01cr@509
|
43 }
|
mas01cr@509
|
44
|
mas01cr@589
|
45 void audiodb_index_add_point_approximate(void *user_data, uint32_t pointID, uint32_t qpos, float dist) {
|
mas01cr@509
|
46 adb_qcallback_t *data = (adb_qcallback_t *) user_data;
|
mas01cr@509
|
47 adb_t *adb = data->adb;
|
mas01cr@509
|
48 adb_qstate_internal_t *qstate = data->qstate;
|
mas01mc@534
|
49 uint32_t trackID = audiodb_index_to_track_id(adb, pointID);
|
mas01mc@534
|
50 uint32_t spos = audiodb_index_to_track_pos(adb, trackID, pointID);
|
mas01cr@509
|
51 std::set<std::string>::iterator keys_end = qstate->allowed_keys->end();
|
mas01cr@509
|
52 if(qstate->allowed_keys->find((*adb->keys)[trackID]) != keys_end) {
|
mas01cr@509
|
53 adb_result_t r;
|
mas01cr@672
|
54 r.ikey = (*adb->keys)[trackID].c_str();
|
mas01cr@672
|
55 r.qkey = qstate->qkey;
|
mas01cr@509
|
56 r.dist = dist;
|
mas01cr@509
|
57 r.qpos = qpos;
|
mas01cr@509
|
58 r.ipos = spos;
|
mas01cr@610
|
59 if(qstate->set->find(r) == qstate->set->end()) {
|
mas01cr@610
|
60 qstate->set->insert(r);
|
mas01cr@610
|
61 qstate->accumulator->add_point(&r);
|
mas01cr@610
|
62 }
|
mas01cr@509
|
63 }
|
mas01cr@509
|
64 }
|
mas01cr@509
|
65
|
mas01cr@509
|
66 // Maintain a queue of points to pass to audiodb_query_queue_loop()
|
mas01cr@509
|
67 // for exact evaluation
|
mas01cr@589
|
68 void audiodb_index_add_point_exact(void *user_data, uint32_t pointID, uint32_t qpos, float dist) {
|
mas01cr@509
|
69 adb_qcallback_t *data = (adb_qcallback_t *) user_data;
|
mas01cr@509
|
70 adb_t *adb = data->adb;
|
mas01cr@509
|
71 adb_qstate_internal_t *qstate = data->qstate;
|
mas01mc@534
|
72 uint32_t trackID = audiodb_index_to_track_id(adb, pointID);
|
mas01mc@534
|
73 uint32_t spos = audiodb_index_to_track_pos(adb, trackID, pointID);
|
mas01cr@509
|
74 std::set<std::string>::iterator keys_end = qstate->allowed_keys->end();
|
mas01cr@509
|
75 if(qstate->allowed_keys->find((*adb->keys)[trackID]) != keys_end) {
|
mas01cr@509
|
76 PointPair p(trackID, qpos, spos);
|
mas01cr@509
|
77 qstate->exact_evaluation_queue->push(p);
|
mas01cr@509
|
78 }
|
mas01cr@509
|
79 }
|
mas01cr@509
|
80
|
mas01cr@509
|
81 // return -1 on error
|
mas01cr@509
|
82 // return 0: if index does not exist
|
mas01cr@509
|
83 // return nqv: if index exists
|
mas01cr@509
|
84 int audiodb_index_query_loop(adb_t *adb, const adb_query_spec_t *spec, adb_qstate_internal_t *qstate) {
|
mas01mc@534
|
85 if(adb->header->flags>>28)
|
mas01mc@534
|
86 cerr << "WARNING: Database created using deprecated LSH_N_POINT_BITS coding: REBUILD INDEXES..." << endl;
|
mas01mc@534
|
87
|
mas01cr@509
|
88 double *query = 0, *query_data = 0;
|
mas01cr@509
|
89 adb_qpointers_internal_t qpointers = {0};
|
mas01cr@509
|
90
|
mas01cr@509
|
91 adb_qcallback_t callback_data;
|
mas01cr@509
|
92 callback_data.adb = adb;
|
mas01cr@509
|
93 callback_data.qstate = qstate;
|
mas01cr@509
|
94
|
mas01cr@509
|
95 void (*add_point_func)(void *, uint32_t, uint32_t, float);
|
mas01cr@509
|
96
|
mas01cr@509
|
97 uint32_t sequence_length = spec->qid.sequence_length;
|
mas01cr@509
|
98 bool normalized = (spec->params.distance == ADB_DISTANCE_EUCLIDEAN_NORMED);
|
mas01cr@509
|
99 double radius = spec->refine.radius;
|
mas01cr@509
|
100 bool use_absolute_threshold = spec->refine.flags & ADB_REFINE_ABSOLUTE_THRESHOLD;
|
mas01cr@509
|
101 double absolute_threshold = spec->refine.absolute_threshold;
|
mas01cr@509
|
102
|
mas01cr@610
|
103 qstate->set = new std::set< adb_result_t, adb_result_triple_lt >;
|
mas01cr@610
|
104
|
mas01cr@509
|
105 if(spec->qid.flags & ADB_QID_FLAG_ALLOW_FALSE_POSITIVES) {
|
mas01cr@509
|
106 add_point_func = &audiodb_index_add_point_approximate;
|
mas01cr@509
|
107 } else {
|
mas01cr@509
|
108 qstate->exact_evaluation_queue = new std::priority_queue<PointPair>;
|
mas01cr@509
|
109 add_point_func = &audiodb_index_add_point_exact;
|
mas01cr@509
|
110 }
|
mas01cr@509
|
111
|
mas01cr@509
|
112 /* FIXME: this hardwired lsh_in_core is here to allow for a
|
mas01cr@509
|
113 * transition period while the need for the argument is worked
|
mas01cr@509
|
114 * through. Hopefully it will disappear again eventually. */
|
mas01cr@509
|
115 bool lsh_in_core = true;
|
mas01cr@509
|
116
|
mas01cr@509
|
117 if(!audiodb_index_init_query(adb, spec, qstate, lsh_in_core)) {
|
mas01cr@509
|
118 return 0;
|
mas01cr@509
|
119 }
|
mas01cr@509
|
120
|
mas01cr@509
|
121 char *database = audiodb_index_get_name(adb->path, radius, sequence_length);
|
mas01cr@509
|
122 if(!database) {
|
mas01cr@509
|
123 return -1;
|
mas01cr@509
|
124 }
|
mas01cr@509
|
125
|
mas01cr@509
|
126 if(audiodb_query_spec_qpointers(adb, spec, &query_data, &query, &qpointers)) {
|
mas01cr@509
|
127 delete [] database;
|
mas01cr@509
|
128 return -1;
|
mas01cr@509
|
129 }
|
mas01cr@509
|
130
|
mas01mc@534
|
131 uint32_t Nq = qpointers.nvectors - sequence_length + 1;
|
mas01cr@509
|
132 std::vector<std::vector<float> > *vv = audiodb_index_initialize_shingles(Nq, adb->header->dim, sequence_length);
|
mas01cr@509
|
133
|
mas01cr@509
|
134 // Construct shingles from query features
|
mas01cr@509
|
135 for(uint32_t pointID = 0; pointID < Nq; pointID++) {
|
mas01cr@509
|
136 audiodb_index_make_shingle(vv, pointID, query, adb->header->dim, sequence_length);
|
mas01cr@509
|
137 }
|
mas01cr@509
|
138
|
mas01cr@509
|
139 // Normalize query vectors
|
mas01cr@509
|
140 int vcount = audiodb_index_norm_shingles(vv, qpointers.l2norm, qpointers.power, adb->header->dim, sequence_length, radius, normalized, use_absolute_threshold, absolute_threshold);
|
mas01cr@509
|
141 if(vcount == -1) {
|
mas01cr@509
|
142 audiodb_index_delete_shingles(vv);
|
mas01cr@509
|
143 delete [] database;
|
mas01cr@509
|
144 return -1;
|
mas01cr@509
|
145 }
|
mas01cr@509
|
146 uint32_t numVecsAboveThreshold = vcount;
|
mas01cr@509
|
147
|
mas01cr@509
|
148 // Nq contains number of inspected points in query file,
|
mas01cr@509
|
149 // numVecsAboveThreshold is number of points with power >= absolute_threshold
|
mas01cr@509
|
150 double *qpp = qpointers.power; // Keep original qpPtr for possible exact evaluation
|
mas01cr@509
|
151 if(!(spec->qid.flags & ADB_QID_FLAG_EXHAUSTIVE) && numVecsAboveThreshold) {
|
mas01cr@509
|
152 if((qstate->lsh->get_lshHeader()->flags & O2_SERIAL_FILEFORMAT2) || lsh_in_core) {
|
mas01cr@509
|
153 qstate->lsh->retrieve_point((*vv)[0], spec->qid.sequence_start, add_point_func, &callback_data);
|
mas01cr@509
|
154 } else {
|
mas01cr@509
|
155 qstate->lsh->serial_retrieve_point(database, (*vv)[0], spec->qid.sequence_start, add_point_func, &callback_data);
|
mas01cr@509
|
156 }
|
mas01cr@509
|
157 } else if(numVecsAboveThreshold) {
|
mas01cr@509
|
158 for(uint32_t pointID = 0; pointID < Nq; pointID++) {
|
mas01cr@509
|
159 if(!use_absolute_threshold || (use_absolute_threshold && (*qpp++ >= absolute_threshold))) {
|
mas01cr@509
|
160 if((qstate->lsh->get_lshHeader()->flags & O2_SERIAL_FILEFORMAT2) || lsh_in_core) {
|
mas01cr@509
|
161 qstate->lsh->retrieve_point((*vv)[pointID], pointID, add_point_func, &callback_data);
|
mas01cr@509
|
162 } else {
|
mas01cr@509
|
163 qstate->lsh->serial_retrieve_point(database, (*vv)[pointID], pointID, add_point_func, &callback_data);
|
mas01cr@509
|
164 }
|
mas01cr@509
|
165 }
|
mas01cr@509
|
166 }
|
mas01cr@509
|
167 }
|
mas01cr@509
|
168 audiodb_index_delete_shingles(vv);
|
mas01cr@509
|
169
|
mas01cr@509
|
170 if(!(spec->qid.flags & ADB_QID_FLAG_ALLOW_FALSE_POSITIVES)) {
|
mas01cr@509
|
171 audiodb_query_queue_loop(adb, spec, qstate, query, &qpointers);
|
mas01cr@509
|
172 }
|
mas01cr@610
|
173
|
mas01cr@610
|
174 delete qstate->set;
|
mas01cr@610
|
175
|
mas01cr@509
|
176
|
mas01cr@509
|
177 // Clean up
|
mas01cr@509
|
178 if(query_data)
|
mas01cr@509
|
179 delete[] query_data;
|
mas01cr@509
|
180 if(qpointers.l2norm_data)
|
mas01cr@509
|
181 delete[] qpointers.l2norm_data;
|
mas01cr@509
|
182 if(qpointers.power_data)
|
mas01cr@509
|
183 delete[] qpointers.power_data;
|
mas01cr@509
|
184 if(qpointers.mean_duration)
|
mas01cr@509
|
185 delete[] qpointers.mean_duration;
|
mas01cr@509
|
186 if(database)
|
mas01cr@509
|
187 delete[] database;
|
mas01cr@509
|
188 if(qstate->lsh != adb->cached_lsh)
|
mas01cr@509
|
189 delete qstate->lsh;
|
mas01cr@509
|
190
|
mas01cr@509
|
191 return Nq;
|
mas01cr@509
|
192 }
|