comparison query.cpp @ 498:342822c2d49a

Merge api-inversion branch (-r656:771, but I don't expect to return to that branch) into the trunk. I expect there to be minor performance regressions (e.g. in the SOAP server index cacheing, which I have forcibly removed) and minor unplugged memory leaks (e.g. in audioDB::query(), where I don't free up the datum). I hope that these leaks and performance regressions can be plugged in short order. I also expect that some (but maybe not all) of the issues currently addressed in the memory-leaks branch are superseded or fixed by this merge. There remains much work to be done; go forth and do it.
author mas01cr
date Sat, 10 Jan 2009 16:47:57 +0000
parents fd890d2b38da
children cc2b97d020b1
comparison
equal deleted inserted replaced
476:a7193678280b 498:342822c2d49a
1 #include "audioDB.h" 1 #include "audioDB.h"
2 #include "reporter.h" 2 #include "audioDB-internals.h"
3 3 #include "accumulators.h"
4 bool audioDB::powers_acceptable(double p1, double p2) { 4
5 if (use_absolute_threshold) { 5 bool audiodb_powers_acceptable(const adb_query_refine_t *r, double p1, double p2) {
6 if ((p1 < absolute_threshold) || (p2 < absolute_threshold)) { 6 if (r->flags & ADB_REFINE_ABSOLUTE_THRESHOLD) {
7 if ((p1 < r->absolute_threshold) || (p2 < r->absolute_threshold)) {
7 return false; 8 return false;
8 } 9 }
9 } 10 }
10 if (use_relative_threshold) { 11 if (r->flags & ADB_REFINE_RELATIVE_THRESHOLD) {
11 if (fabs(p1-p2) > fabs(relative_threshold)) { 12 if (fabs(p1-p2) > fabs(r->relative_threshold)) {
12 return false; 13 return false;
13 } 14 }
14 } 15 }
15 return true; 16 return true;
16 } 17 }
17 18
18 void audioDB::query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse) { 19 adb_query_results_t *audiodb_query_spec(adb_t *adb, const adb_query_spec_t *qspec) {
19 // init database tables and dbH first 20 adb_qstate_internal_t qstate = {0};
20 if(query_from_key) 21 qstate.allowed_keys = new std::set<std::string>;
21 initTables(dbName); 22 adb_query_results_t *results;
22 else 23 if(qspec->refine.flags & ADB_REFINE_INCLUDE_KEYLIST) {
23 initTables(dbName, inFile); 24 for(unsigned int k = 0; k < qspec->refine.include.nkeys; k++) {
24 25 qstate.allowed_keys->insert(qspec->refine.include.keys[k]);
25 // keyKeyPos requires dbH to be initialized 26 }
26 if(query_from_key && (!key || (query_from_key_index = getKeyPos((char*)key))==O2_ERR_KEYNOTFOUND)) 27 } else {
27 error("Query key not found :",key); 28 for(unsigned int k = 0; k < adb->header->numFiles; k++) {
28 29 qstate.allowed_keys->insert((*adb->keys)[k]);
29 switch (queryType) { 30 }
30 case O2_POINT_QUERY: 31 }
31 sequenceLength = 1; 32 if(qspec->refine.flags & ADB_REFINE_EXCLUDE_KEYLIST) {
32 normalizedDistance = false; 33 for(unsigned int k = 0; k < qspec->refine.exclude.nkeys; k++) {
33 reporter = new pointQueryReporter< std::greater < NNresult > >(pointNN); 34 qstate.allowed_keys->erase(qspec->refine.exclude.keys[k]);
35 }
36 }
37
38 switch(qspec->params.distance) {
39 case ADB_DISTANCE_DOT_PRODUCT:
40 switch(qspec->params.accumulation) {
41 case ADB_ACCUMULATION_DB:
42 qstate.accumulator = new DBAccumulator<adb_result_dist_gt>(qspec->params.npoints);
43 break;
44 case ADB_ACCUMULATION_PER_TRACK:
45 qstate.accumulator = new PerTrackAccumulator<adb_result_dist_gt>(qspec->params.npoints, qspec->params.ntracks);
46 break;
47 case ADB_ACCUMULATION_ONE_TO_ONE:
48 qstate.accumulator = new NearestAccumulator<adb_result_dist_gt>();
49 break;
50 default:
51 goto error;
52 }
34 break; 53 break;
35 case O2_TRACK_QUERY: 54 case ADB_DISTANCE_EUCLIDEAN_NORMED:
36 sequenceLength = 1; 55 case ADB_DISTANCE_EUCLIDEAN:
37 normalizedDistance = false; 56 switch(qspec->params.accumulation) {
38 reporter = new trackAveragingReporter< std::greater< NNresult > >(pointNN, trackNN, dbH->numFiles); 57 case ADB_ACCUMULATION_DB:
39 break; 58 qstate.accumulator = new DBAccumulator<adb_result_dist_lt>(qspec->params.npoints);
40 case O2_SEQUENCE_QUERY: 59 break;
41 if(no_unit_norming) 60 case ADB_ACCUMULATION_PER_TRACK:
42 normalizedDistance = false; 61 qstate.accumulator = new PerTrackAccumulator<adb_result_dist_lt>(qspec->params.npoints, qspec->params.ntracks);
43 if(radius == 0) { 62 break;
44 reporter = new trackAveragingReporter< std::less< NNresult > >(pointNN, trackNN, dbH->numFiles); 63 case ADB_ACCUMULATION_ONE_TO_ONE:
45 } else { 64 qstate.accumulator = new NearestAccumulator<adb_result_dist_lt>();
46 if(index_exists(dbName, radius, sequenceLength)){ 65 break;
47 char* indexName = index_get_name(dbName, radius, sequenceLength); 66 default:
48 lsh = index_allocate(indexName, false); 67 goto error;
49 reporter = new trackSequenceQueryRadReporter(trackNN, index_to_trackID(lsh->get_maxp(), lsh_n_point_bits)+1);
50 delete[] indexName;
51 }
52 else
53 reporter = new trackSequenceQueryRadReporter(trackNN, dbH->numFiles);
54 }
55 break;
56 case O2_N_SEQUENCE_QUERY:
57 if(no_unit_norming)
58 normalizedDistance = false;
59 if(radius == 0) {
60 reporter = new trackSequenceQueryNNReporter< std::less < NNresult > >(pointNN, trackNN, dbH->numFiles);
61 } else {
62 if(index_exists(dbName, radius, sequenceLength)){
63 char* indexName = index_get_name(dbName, radius, sequenceLength);
64 lsh = index_allocate(indexName, false);
65 reporter = new trackSequenceQueryRadNNReporter(pointNN,trackNN, index_to_trackID(lsh->get_maxp(), lsh_n_point_bits)+1);
66 delete[] indexName;
67 }
68 else
69 reporter = new trackSequenceQueryRadNNReporter(pointNN,trackNN, dbH->numFiles);
70 }
71 break;
72 case O2_ONE_TO_ONE_N_SEQUENCE_QUERY :
73 if(no_unit_norming)
74 normalizedDistance = false;
75 if(radius == 0) {
76 error("query-type not yet supported");
77 }
78 else {
79 if(index_exists(dbName, radius, sequenceLength)){
80 char* indexName = index_get_name(dbName, radius, sequenceLength);
81 lsh = index_allocate(indexName, false);
82 reporter = new trackSequenceQueryRadNNReporterOneToOne(pointNN,trackNN, index_to_trackID(lsh->get_maxp(), lsh_n_point_bits)+1);
83 delete[] indexName;
84 }
85 else
86 reporter = new trackSequenceQueryRadNNReporterOneToOne(pointNN,trackNN, dbH->numFiles);
87 } 68 }
88 break; 69 break;
89 default: 70 default:
90 error("unrecognized queryType in query()"); 71 goto error;
91 } 72 }
92 73
93 // Test for index (again) here 74 if((qspec->refine.flags & ADB_REFINE_RADIUS) && audiodb_index_exists(adb->path, qspec->refine.radius, qspec->qid.sequence_length)) {
94 if(radius && index_exists(dbName, radius, sequenceLength)){ 75 if(audiodb_index_query_loop(adb, qspec, &qstate) < 0) {
95 VERB_LOG(1, "Calling indexed query on database %s, radius=%f, sequenceLength=%d\n", dbName, radius, sequenceLength); 76 goto error;
96 index_query_loop(dbName, query_from_key_index); 77 }
97 } 78 } else {
98 else{ 79 if(audiodb_query_loop(adb, qspec, &qstate)) {
99 VERB_LOG(1, "Calling brute-force query on database %s\n", dbName); 80 goto error;
100 query_loop(dbName, query_from_key_index); 81 }
101 } 82 }
102 83
103 reporter->report(fileTable, adbQueryResponse); 84 results = qstate.accumulator->get_points();
104 } 85
105 86 delete qstate.accumulator;
106 // return ordinal position of key in keyTable 87 delete qstate.allowed_keys;
107 // this should really be a STL hash map search 88
108 unsigned audioDB::getKeyPos(char* key){ 89 return results;
109 if(!dbH) 90
110 error("dbH not initialized","getKeyPos"); 91 error:
111 for(unsigned k=0; k<dbH->numFiles; k++) 92 if(qstate.accumulator)
112 if(strncmp(fileTable + k*O2_FILETABLE_ENTRY_SIZE, key, strlen(key))==0) 93 delete qstate.accumulator;
113 return k; 94 if(qstate.allowed_keys)
114 error("Key not found",key); 95 delete qstate.allowed_keys;
115 return O2_ERR_KEYNOTFOUND; 96 return NULL;
116 } 97 }
117 98
118 // This is a common pattern in sequence queries: what we are doing is 99 int audiodb_query_free_results(adb_t *adb, const adb_query_spec_t *spec, adb_query_results_t *rs) {
119 // taking a window of length seqlen over a buffer of length length, 100 free(rs->results);
120 // and placing the sum of the elements in that window in the first 101 free(rs);
121 // element of the window: thus replacing all but the last seqlen 102 return 0;
122 // elements in the buffer with the corresponding windowed sum. 103 }
123 void audioDB::sequence_sum(double *buffer, int length, int seqlen) { 104
124 double tmp1, tmp2, *ps; 105 static void audiodb_initialize_arrays(adb_t *adb, const adb_query_spec_t *spec, int track, unsigned int numVectors, double *query, double *data_buffer, double **D, double **DD) {
125 int j, w;
126
127 tmp1 = *buffer;
128 j = 1;
129 w = seqlen - 1;
130 while(w--) {
131 *buffer += buffer[j++];
132 }
133 ps = buffer + 1;
134 w = length - seqlen; // +1 - 1
135 while(w--) {
136 tmp2 = *ps;
137 if(isfinite(tmp1)) {
138 *ps = *(ps - 1) - tmp1 + *(ps + seqlen - 1);
139 } else {
140 for(int i = 1; i < seqlen; i++) {
141 *ps += *(ps + i);
142 }
143 }
144 tmp1 = tmp2;
145 ps++;
146 }
147 }
148
149 // In contrast to sequence_sum() above, sequence_sqrt() and
150 // sequence_average() below are simple mappers across the sequence.
151 void audioDB::sequence_sqrt(double *buffer, int length, int seqlen) {
152 int w = length - seqlen + 1;
153 while(w--) {
154 *buffer = sqrt(*buffer);
155 buffer++;
156 }
157 }
158
159 void audioDB::sequence_average(double *buffer, int length, int seqlen) {
160 int w = length - seqlen + 1;
161 while(w--) {
162 *buffer /= seqlen;
163 buffer++;
164 }
165 }
166
167 void audioDB::initialize_arrays(int track, unsigned int numVectors, double *query, double *data_buffer, double **D, double **DD) {
168 unsigned int j, k, l, w; 106 unsigned int j, k, l, w;
169 double *dp, *qp, *sp; 107 double *dp, *qp, *sp;
170 108
171 const unsigned HOP_SIZE = sequenceHop; 109 const unsigned HOP_SIZE = spec->refine.hopsize;
172 const unsigned wL = sequenceLength; 110 const unsigned wL = spec->qid.sequence_length;
173 111
174 for(j = 0; j < numVectors; j++) { 112 for(j = 0; j < numVectors; j++) {
175 // Sum products matrix 113 // Sum products matrix
176 D[j] = new double[trackTable[track]]; 114 D[j] = new double[(*adb->track_lengths)[track]];
177 assert(D[j]); 115 assert(D[j]);
178 // Matched filter matrix 116 // Matched filter matrix
179 DD[j]=new double[trackTable[track]]; 117 DD[j]=new double[(*adb->track_lengths)[track]];
180 assert(DD[j]); 118 assert(DD[j]);
181 } 119 }
182 120
183 // Dot product 121 // Dot product
184 for(j = 0; j < numVectors; j++) 122 for(j = 0; j < numVectors; j++)
185 for(k = 0; k < trackTable[track]; k++){ 123 for(k = 0; k < (*adb->track_lengths)[track]; k++){
186 qp = query + j * dbH->dim; 124 qp = query + j * adb->header->dim;
187 sp = data_buffer + k * dbH->dim; 125 sp = data_buffer + k * adb->header->dim;
188 DD[j][k] = 0.0; // Initialize matched filter array 126 DD[j][k] = 0.0; // Initialize matched filter array
189 dp = &D[j][k]; // point to correlation cell j,k 127 dp = &D[j][k]; // point to correlation cell j,k
190 *dp = 0.0; // initialize correlation cell 128 *dp = 0.0; // initialize correlation cell
191 l = dbH->dim; // size of vectors 129 l = adb->header->dim; // size of vectors
192 while(l--) 130 while(l--)
193 *dp += *qp++ * *sp++; 131 *dp += *qp++ * *sp++;
194 } 132 }
195 133
196 // Matched Filter 134 // Matched Filter
199 if(HOP_SIZE == 1) { // HOP_SIZE = shingleHop 137 if(HOP_SIZE == 1) { // HOP_SIZE = shingleHop
200 for(w = 0; w < wL; w++) { 138 for(w = 0; w < wL; w++) {
201 for(j = 0; j < numVectors - w; j++) { 139 for(j = 0; j < numVectors - w; j++) {
202 sp = DD[j]; 140 sp = DD[j];
203 spd = D[j+w] + w; 141 spd = D[j+w] + w;
204 k = trackTable[track] - w; 142 k = (*adb->track_lengths)[track] - w;
205 while(k--) 143 while(k--)
206 *sp++ += *spd++; 144 *sp++ += *spd++;
207 } 145 }
208 } 146 }
209 } else { // HOP_SIZE != 1 147 } else { // HOP_SIZE != 1
210 for(w = 0; w < wL; w++) { 148 for(w = 0; w < wL; w++) {
211 for(j = 0; j < numVectors - w; j += HOP_SIZE) { 149 for(j = 0; j < numVectors - w; j += HOP_SIZE) {
212 sp = DD[j]; 150 sp = DD[j];
213 spd = D[j+w]+w; 151 spd = D[j+w]+w;
214 for(k = 0; k < trackTable[track] - w; k += HOP_SIZE) { 152 for(k = 0; k < (*adb->track_lengths)[track] - w; k += HOP_SIZE) {
215 *sp += *spd; 153 *sp += *spd;
216 sp += HOP_SIZE; 154 sp += HOP_SIZE;
217 spd += HOP_SIZE; 155 spd += HOP_SIZE;
218 } 156 }
219 } 157 }
220 } 158 }
221 } 159 }
222 } 160 }
223 161
224 void audioDB::delete_arrays(int track, unsigned int numVectors, double **D, double **DD) { 162 static void audiodb_delete_arrays(int track, unsigned int numVectors, double **D, double **DD) {
225 if(D != NULL) { 163 if(D != NULL) {
226 for(unsigned int j = 0; j < numVectors; j++) { 164 for(unsigned int j = 0; j < numVectors; j++) {
227 delete[] D[j]; 165 delete[] D[j];
228 } 166 }
229 } 167 }
232 delete[] DD[j]; 170 delete[] DD[j];
233 } 171 }
234 } 172 }
235 } 173 }
236 174
237 void audioDB::read_data(int trkfid, int track, double **data_buffer_p, size_t *data_buffer_size_p) { 175 int audiodb_read_data(adb_t *adb, int trkfid, int track, double **data_buffer_p, size_t *data_buffer_size_p) {
238 if (trackTable[track] * sizeof(double) * dbH->dim > *data_buffer_size_p) { 176 uint32_t track_length = (*adb->track_lengths)[track];
177 size_t track_size = track_length * sizeof(double) * adb->header->dim;
178 if (track_size > *data_buffer_size_p) {
239 if(*data_buffer_p) { 179 if(*data_buffer_p) {
240 free(*data_buffer_p); 180 free(*data_buffer_p);
241 } 181 }
242 { 182 {
243 *data_buffer_size_p = trackTable[track] * sizeof(double) * dbH->dim; 183 *data_buffer_size_p = track_size;
244 void *tmp = malloc(*data_buffer_size_p); 184 void *tmp = malloc(track_size);
245 if (tmp == NULL) { 185 if (tmp == NULL) {
246 error("error allocating data buffer"); 186 goto error;
247 } 187 }
248 *data_buffer_p = (double *) tmp; 188 *data_buffer_p = (double *) tmp;
249 } 189 }
250 } 190 }
251 191
252 CHECKED_READ(trkfid, *data_buffer_p, trackTable[track] * sizeof(double) * dbH->dim); 192 read_or_goto_error(trkfid, *data_buffer_p, track_size);
253 } 193 return 0;
254 194
255 // These names deserve some unpicking. The names starting with a "q" 195 error:
256 // are pointers to the query, norm and power vectors; the names 196 return 1;
257 // starting with "v" are things that will end up pointing to the 197 }
258 // actual query point's information. -- CSR, 2007-12-05 198
259 void audioDB::set_up_query(double **qp, double **vqp, double **qnp, double **vqnp, double **qpp, double **vqpp, double *mqdp, unsigned *nvp) { 199 int audiodb_track_id_datum(adb_t *adb, uint32_t track_id, adb_datum_t *d) {
260 *nvp = (statbuf.st_size - sizeof(int)) / (dbH->dim * sizeof(double)); 200 off_t track_offset = (*adb->track_offsets)[track_id];
261 201 if(adb->header->flags & O2_FLAG_LARGE_ADB) {
262 if(!(dbH->flags & O2_FLAG_L2NORM)) { 202 /* create a reference/insert, then use adb_insert_create_datum() */
263 error("Database must be L2 normed for sequence query","use -L2NORM"); 203 adb_reference_t reference = {0};
264 } 204 char features[MAXSTR], power[MAXSTR], times[MAXSTR];
265 205 lseek(adb->fd, adb->header->dataOffset + track_id * O2_FILETABLE_ENTRY_SIZE, SEEK_SET);
266 if(*nvp < sequenceLength) { 206 read_or_goto_error(adb->fd, features, MAXSTR);
267 error("Query shorter than requested sequence length", "maybe use -l"); 207 reference.features = features;
268 } 208 if(adb->header->flags & O2_FLAG_POWER) {
209 lseek(adb->fd, adb->header->powerTableOffset + track_id * O2_FILETABLE_ENTRY_SIZE, SEEK_SET);
210 read_or_goto_error(adb->fd, power, MAXSTR);
211 reference.power = power;
212 }
213 if(adb->header->flags & O2_FLAG_TIMES) {
214 lseek(adb->fd, adb->header->timesTableOffset + track_id * O2_FILETABLE_ENTRY_SIZE, SEEK_SET);
215 read_or_goto_error(adb->fd, times, MAXSTR);
216 reference.times = times;
217 }
218 return audiodb_insert_create_datum(&reference, d);
219 } else {
220 /* initialize from sources of data that we already have */
221 d->nvectors = (*adb->track_lengths)[track_id];
222 d->dim = adb->header->dim;
223 d->key = (*adb->keys)[track_id].c_str();
224 /* read out stuff from the database tables */
225 d->data = (double *) malloc(d->nvectors * d->dim * sizeof(double));
226 lseek(adb->fd, adb->header->dataOffset + track_offset, SEEK_SET);
227 read_or_goto_error(adb->fd, d->data, d->nvectors * d->dim * sizeof(double));
228 if(adb->header->flags & O2_FLAG_POWER) {
229 d->power = (double *) malloc(d->nvectors * sizeof(double));
230 lseek(adb->fd, adb->header->powerTableOffset + track_offset / d->dim, SEEK_SET);
231 read_or_goto_error(adb->fd, d->power, d->nvectors * sizeof(double));
232 }
233 if(adb->header->flags & O2_FLAG_TIMES) {
234 d->times = (double *) malloc(2 * d->nvectors * sizeof(double));
235 lseek(adb->fd, adb->header->timesTableOffset + track_offset / d->dim, SEEK_SET);
236 read_or_goto_error(adb->fd, d->times, 2 * d->nvectors * sizeof(double));
237 }
238 return 0;
239 }
240 error:
241 audiodb_free_datum(d);
242 return 1;
243 }
244
245 int audiodb_datum_qpointers(adb_datum_t *d, uint32_t sequence_length, double **vector_data, double **vector, adb_qpointers_internal_t *qpointers) {
246 uint32_t nvectors = d->nvectors;
247
248 qpointers->nvectors = nvectors;
249
250 size_t vector_size = nvectors * sizeof(double) * d->dim;
251 *vector_data = new double[vector_size];
252 memcpy(*vector_data, d->data, vector_size);
253
254 qpointers->l2norm_data = new double[vector_size / d->dim];
255 audiodb_l2norm_buffer(*vector_data, d->dim, nvectors, qpointers->l2norm_data);
256 audiodb_sequence_sum(qpointers->l2norm_data, nvectors, sequence_length);
257 audiodb_sequence_sqrt(qpointers->l2norm_data, nvectors, sequence_length);
258
259 if(d->power) {
260 qpointers->power_data = new double[vector_size / d->dim];
261 memcpy(qpointers->power_data, d->power, vector_size / d->dim);
262 audiodb_sequence_sum(qpointers->power_data, nvectors, sequence_length);
263 audiodb_sequence_average(qpointers->power_data, nvectors, sequence_length);
264 }
265
266 if(d->times) {
267 qpointers->mean_duration = new double[1];
268 *qpointers->mean_duration = 0;
269 for(unsigned int k = 0; k < nvectors; k++) {
270 *qpointers->mean_duration += d->times[2*k+1] - d->times[2*k];
271 }
272 *qpointers->mean_duration /= nvectors;
273 }
274
275 *vector = *vector_data;
276 qpointers->l2norm = qpointers->l2norm_data;
277 qpointers->power = qpointers->power_data;
278 return 0;
279 }
280
281 int audiodb_query_spec_qpointers(adb_t *adb, const adb_query_spec_t *spec, double **vector_data, double **vector, adb_qpointers_internal_t *qpointers) {
282 adb_datum_t *datum;
283 adb_datum_t d = {0};
284 uint32_t sequence_length;
285 uint32_t sequence_start;
286
287 datum = spec->qid.datum;
288 sequence_length = spec->qid.sequence_length;
289 sequence_start = spec->qid.sequence_start;
290
291 if(datum->data) {
292 if(datum->dim != adb->header->dim) {
293 return 1;
294 }
295 /* initialize d, and mark that nothing needs freeing later. */
296 d = *datum;
297 datum = &d;
298 } else if (datum->key) {
299 uint32_t track_id;
300 if((track_id = audiodb_key_index(adb, datum->key)) == (uint32_t) -1) {
301 return 1;
302 }
303 audiodb_track_id_datum(adb, track_id, &d);
304 } else {
305 return 1;
306 }
307
308 /* FIXME: check the overflow logic here */
309 if(sequence_start + sequence_length > d.nvectors) {
310 if(datum != &d) {
311 audiodb_free_datum(&d);
312 }
313 return 1;
314 }
315
316 audiodb_datum_qpointers(&d, sequence_length, vector_data, vector, qpointers);
317
318 /* Finally, if applicable, set up the moving qpointers. */
319 if(spec->qid.flags & ADB_QID_FLAG_EXHAUSTIVE) {
320 /* the qpointers are already at the start, and so correct. */
321 } else {
322 /* adjust the qpointers to point to the correct place in the sequence */
323 *vector = *vector_data + spec->qid.sequence_start * d.dim;
324 qpointers->l2norm = qpointers->l2norm_data + spec->qid.sequence_start;
325 if(d.power) {
326 qpointers->power = qpointers->power_data + spec->qid.sequence_start;
327 }
328 qpointers->nvectors = sequence_length;
329 }
330
331 /* Clean up: free any bits of datum that we have ourselves
332 * allocated. */
333 if(datum != &d) {
334 audiodb_free_datum(&d);
335 }
336
337 return 0;
338 }
339
340 static int audiodb_set_up_dbpointers(adb_t *adb, const adb_query_spec_t *spec, adb_qpointers_internal_t *dbpointers) {
341 uint32_t nvectors = adb->header->length / (adb->header->dim * sizeof(double));
342 uint32_t sequence_length = spec->qid.sequence_length;
343
344 bool using_power = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD);
345 bool using_times = spec->refine.flags & ADB_REFINE_DURATION_RATIO;
346 double *times_table = NULL;
347
348
349 dbpointers->nvectors = nvectors;
350 dbpointers->l2norm_data = new double[nvectors];
351
352 double *snpp = dbpointers->l2norm_data, *sppp = 0;
353 lseek(adb->fd, adb->header->l2normTableOffset, SEEK_SET);
354 read_or_goto_error(adb->fd, dbpointers->l2norm_data, nvectors * sizeof(double));
355
356 if (using_power) {
357 if (!(adb->header->flags & O2_FLAG_POWER)) {
358 goto error;
359 }
360 dbpointers->power_data = new double[nvectors];
361 sppp = dbpointers->power_data;
362 lseek(adb->fd, adb->header->powerTableOffset, SEEK_SET);
363 read_or_goto_error(adb->fd, dbpointers->power_data, nvectors * sizeof(double));
364 }
365
366 for(unsigned int i = 0; i < adb->header->numFiles; i++){
367 size_t track_length = (*adb->track_lengths)[i];
368 if(track_length >= sequence_length) {
369 audiodb_sequence_sum(snpp, track_length, sequence_length);
370 audiodb_sequence_sqrt(snpp, track_length, sequence_length);
371 if (using_power) {
372 audiodb_sequence_sum(sppp, track_length, sequence_length);
373 audiodb_sequence_average(sppp, track_length, sequence_length);
374 }
375 }
376 snpp += track_length;
377 if (using_power) {
378 sppp += track_length;
379 }
380 }
381
382 if (using_times) {
383 if(!(adb->header->flags & O2_FLAG_TIMES)) {
384 goto error;
385 }
386
387 dbpointers->mean_duration = new double[adb->header->numFiles];
388
389 times_table = (double *) malloc(2 * nvectors * sizeof(double));
390 if(!times_table) {
391 goto error;
392 }
393 lseek(adb->fd, adb->header->timesTableOffset, SEEK_SET);
394 read_or_goto_error(adb->fd, times_table, 2 * nvectors * sizeof(double));
395 for(unsigned int k = 0; k < adb->header->numFiles; k++) {
396 size_t track_length = (*adb->track_lengths)[k];
397 unsigned int j;
398 dbpointers->mean_duration[k] = 0.0;
399 for(j = 0; j < track_length; j++) {
400 dbpointers->mean_duration[k] += times_table[2*j+1] - times_table[2*j];
401 }
402 dbpointers->mean_duration[k] /= j;
403 }
404
405 free(times_table);
406 times_table = NULL;
407 }
408
409 dbpointers->l2norm = dbpointers->l2norm_data;
410 dbpointers->power = dbpointers->power_data;
411 return 0;
412
413 error:
414 if(dbpointers->l2norm_data) {
415 delete [] dbpointers->l2norm_data;
416 }
417 if(dbpointers->power_data) {
418 delete [] dbpointers->power_data;
419 }
420 if(dbpointers->mean_duration) {
421 delete [] dbpointers->mean_duration;
422 }
423 if(times_table) {
424 free(times_table);
425 }
426 return 1;
269 427
270 VERB_LOG(1, "performing norms... "); 428 }
271 429
272 *qp = new double[*nvp * dbH->dim]; 430 int audiodb_query_queue_loop(adb_t *adb, const adb_query_spec_t *spec, adb_qstate_internal_t *qstate, double *query, adb_qpointers_internal_t *qpointers) {
273 memcpy(*qp, indata+sizeof(int), *nvp * dbH->dim * sizeof(double)); 431 adb_qpointers_internal_t dbpointers = {0};
274 *qnp = new double[*nvp]; 432
275 unitNorm(*qp, dbH->dim, *nvp, *qnp); 433 uint32_t sequence_length = spec->qid.sequence_length;
276 434 bool power_refine = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD);
277 sequence_sum(*qnp, *nvp, sequenceLength); 435
278 sequence_sqrt(*qnp, *nvp, sequenceLength); 436 if(qstate->exact_evaluation_queue->size() == 0) {
279 437 return 0;
280 if (usingPower) { 438 }
281 *qpp = new double[*nvp]; 439
282 if (lseek(powerfd, sizeof(int), SEEK_SET) == (off_t) -1) { 440 /* We are guaranteed that the order of points is sorted by:
283 error("error seeking to data", powerFileName, "lseek"); 441 * {trackID, spos, qpos} so we can be relatively efficient in
284 } 442 * initialization of track data. We assume that points usually
285 int count = read(powerfd, *qpp, *nvp * sizeof(double)); 443 * don't overlap, so we will use exhaustive dot product evaluation
286 if (count == -1) { 444 * (instead of memoization of partial sums, as in query_loop()).
287 error("error reading data", powerFileName, "read"); 445 */
288 }
289 if ((unsigned) count != *nvp * sizeof(double)) {
290 error("short read", powerFileName);
291 }
292
293 sequence_sum(*qpp, *nvp, sequenceLength);
294 sequence_average(*qpp, *nvp, sequenceLength);
295 }
296
297 if (usingTimes) {
298 unsigned int k;
299 *mqdp = 0.0;
300 double *querydurs = new double[*nvp];
301 double *timesdata = new double[*nvp*2];
302 insertTimeStamps(*nvp, timesFile, timesdata);
303 for(k = 0; k < *nvp; k++) {
304 querydurs[k] = timesdata[2*k+1] - timesdata[2*k];
305 *mqdp += querydurs[k];
306 }
307 *mqdp /= k;
308
309 VERB_LOG(1, "mean query file duration: %f\n", *mqdp);
310
311 delete [] querydurs;
312 delete [] timesdata;
313 }
314
315 // Defaults, for exhaustive search (!usingQueryPoint)
316 *vqp = *qp;
317 *vqnp = *qnp;
318 *vqpp = *qpp;
319
320 if(usingQueryPoint) {
321 if( !(queryPoint < *nvp && queryPoint < *nvp - sequenceLength + 1) ) {
322 error("queryPoint >= numVectors-sequenceLength+1 in query");
323 } else {
324 VERB_LOG(1, "query point: %u\n", queryPoint);
325 *vqp = *qp + queryPoint * dbH->dim;
326 *vqnp = *qnp + queryPoint;
327 if (usingPower) {
328 *vqpp = *qpp + queryPoint;
329 }
330 *nvp = sequenceLength;
331 }
332 }
333 }
334
335 // Does the same as set_up_query(...) but from database features instead of from a file
336 // Constructs the same outputs as set_up_query
337 void audioDB::set_up_query_from_key(double **qp, double **vqp, double **qnp, double **vqnp, double **qpp, double **vqpp, double *mqdp, unsigned *nvp, Uns32T queryIndex) {
338 if(!trackTable)
339 error("trackTable not initialized","set_up_query_from_key");
340
341 if(!(dbH->flags & O2_FLAG_L2NORM)) {
342 error("Database must be L2 normed for sequence query","use -L2NORM");
343 }
344
345 if(dbH->flags & O2_FLAG_POWER)
346 usingPower = true;
347
348 if(dbH->flags & O2_FLAG_TIMES)
349 usingTimes = true;
350
351 *nvp = trackTable[queryIndex];
352 if(*nvp < sequenceLength) {
353 error("Query shorter than requested sequence length", "maybe use -l");
354 }
355
356 VERB_LOG(1, "performing norms... ");
357
358 // For LARGE_ADB load query features from file
359 if( dbH->flags & O2_FLAG_LARGE_ADB ){
360 if(infid>0)
361 close(infid);
362 char* prefixedString = new char[O2_MAXFILESTR];
363 char* tmpStr = prefixedString;
364 strncpy(prefixedString, featureFileNameTable+queryIndex*O2_FILETABLE_ENTRY_SIZE, O2_MAXFILESTR);
365 prefix_name(&prefixedString, adb_feature_root);
366 if(tmpStr!=prefixedString)
367 delete[] tmpStr;
368 initInputFile(prefixedString, false); // nommap, file pointer at correct position
369 size_t allocatedSize = 0;
370 read_data(infid, queryIndex, qp, &allocatedSize); // over-writes qp and allocatedSize
371 // Consistency check on allocated memory and query feature size
372 if(*nvp*sizeof(double)*dbH->dim != allocatedSize)
373 error("Query memory allocation failed consitency check","set_up_query_from_key");
374 // Allocated and calculate auxillary sequences: l2norm and power
375 init_track_aux_data(queryIndex, *qp, qnp, vqnp, qpp, vqpp);
376 }
377 else{ // Load from self-contained ADB database
378 // Read query feature vectors from database
379 *qp = NULL;
380 lseek(dbfid, dbH->dataOffset + trackOffsetTable[queryIndex] * sizeof(double), SEEK_SET);
381 size_t allocatedSize = 0;
382 read_data(dbfid, queryIndex, qp, &allocatedSize);
383 // Consistency check on allocated memory and query feature size
384 if(*nvp*sizeof(double)*dbH->dim != allocatedSize)
385 error("Query memory allocation failed consitency check","set_up_query_from_key");
386
387 Uns32T trackIndexOffset = trackOffsetTable[queryIndex]/dbH->dim; // Convert num data elements to num vectors
388 // Copy L2 norm partial-sum coefficients
389 assert(*qnp = new double[*nvp]);
390 memcpy(*qnp, l2normTable+trackIndexOffset, *nvp*sizeof(double));
391 sequence_sum(*qnp, *nvp, sequenceLength);
392 sequence_sqrt(*qnp, *nvp, sequenceLength);
393
394 if( usingPower ){
395 // Copy Power partial-sum coefficients
396 assert(*qpp = new double[*nvp]);
397 memcpy(*qpp, powerTable+trackIndexOffset, *nvp*sizeof(double));
398 sequence_sum(*qpp, *nvp, sequenceLength);
399 sequence_average(*qpp, *nvp, sequenceLength);
400 }
401
402 if (usingTimes) {
403 unsigned int k;
404 *mqdp = 0.0;
405 double *querydurs = new double[*nvp];
406 double *timesdata = new double[*nvp*2];
407 assert(querydurs && timesdata);
408 memcpy(timesdata, timesTable+trackIndexOffset, *nvp*sizeof(double));
409 for(k = 0; k < *nvp; k++) {
410 querydurs[k] = timesdata[2*k+1] - timesdata[2*k];
411 *mqdp += querydurs[k];
412 }
413 *mqdp /= k;
414
415 VERB_LOG(1, "mean query file duration: %f\n", *mqdp);
416
417 delete [] querydurs;
418 delete [] timesdata;
419 }
420 }
421
422 // Defaults, for exhaustive search (!usingQueryPoint)
423 *vqp = *qp;
424 *vqnp = *qnp;
425 *vqpp = *qpp;
426
427 if(usingQueryPoint) {
428 if( !(queryPoint < *nvp && queryPoint < *nvp - sequenceLength + 1) ) {
429 error("queryPoint >= numVectors-sequenceLength+1 in query");
430 } else {
431 VERB_LOG(1, "query point: %u\n", queryPoint);
432 *vqp = *qp + queryPoint * dbH->dim;
433 *vqnp = *qnp + queryPoint;
434 if (usingPower) {
435 *vqpp = *qpp + queryPoint;
436 }
437 *nvp = sequenceLength;
438 }
439 }
440 }
441
442
443 // FIXME: this is not the right name; we're not actually setting up
444 // the database, but copying various bits of it out of mmap()ed tables
445 // in order to reduce seeks.
446 void audioDB::set_up_db(double **snp, double **vsnp, double **spp, double **vspp, double **mddp, unsigned int *dvp) {
447 *dvp = dbH->length / (dbH->dim * sizeof(double));
448 *snp = new double[*dvp];
449
450 double *snpp = *snp, *sppp = 0;
451 memcpy(*snp, l2normTable, *dvp * sizeof(double));
452
453 if (usingPower) {
454 if (!(dbH->flags & O2_FLAG_POWER)) {
455 error("database not power-enabled", dbName);
456 }
457 *spp = new double[*dvp];
458 sppp = *spp;
459 memcpy(*spp, powerTable, *dvp * sizeof(double));
460 }
461
462 for(unsigned int i = 0; i < dbH->numFiles; i++){
463 if(trackTable[i] >= sequenceLength) {
464 sequence_sum(snpp, trackTable[i], sequenceLength);
465 sequence_sqrt(snpp, trackTable[i], sequenceLength);
466
467 if (usingPower) {
468 sequence_sum(sppp, trackTable[i], sequenceLength);
469 sequence_average(sppp, trackTable[i], sequenceLength);
470 }
471 }
472 snpp += trackTable[i];
473 if (usingPower) {
474 sppp += trackTable[i];
475 }
476 }
477
478 if (usingTimes) {
479 if(!(dbH->flags & O2_FLAG_TIMES)) {
480 error("query timestamps provided for non-timed database", dbName);
481 }
482
483 *mddp = new double[dbH->numFiles];
484
485 for(unsigned int k = 0; k < dbH->numFiles; k++) {
486 unsigned int j;
487 (*mddp)[k] = 0.0;
488 for(j = 0; j < trackTable[k]; j++) {
489 (*mddp)[k] += timesTable[2*j+1] - timesTable[2*j];
490 }
491 (*mddp)[k] /= j;
492 }
493 }
494
495 *vsnp = *snp;
496 *vspp = *spp;
497 }
498
499 // query_points()
500 //
501 // using PointPairs held in the exact_evaluation_queue compute squared distance for each PointPair
502 // and insert result into the current reporter.
503 //
504 // Preconditions:
505 // A query inFile has been opened with setup_query(...) and query pointers initialized
506 // The database contains some points
507 // An exact_evaluation_queue has been allocated and populated
508 // A reporter has been allocated
509 //
510 // Postconditions:
511 // reporter contains the points and distances that meet the reporter constraints
512
513 void audioDB::query_loop_points(double* query, double* qnPtr, double* qpPtr, double meanQdur, Uns32T numVectors){
514 unsigned int dbVectors;
515 double *sNorm = 0, *snPtr, *sPower = 0, *spPtr = 0;
516 double *meanDBdur = 0;
517
518 // check pre-conditions
519 assert(exact_evaluation_queue&&reporter);
520 if(!exact_evaluation_queue->size()) // Exit if no points to evaluate
521 return;
522
523 // Compute database info
524 // FIXME: we more than likely don't need very much of the database
525 // so make a new method to build these values per-track or, even better, per-point
526 if( !( dbH->flags & O2_FLAG_LARGE_ADB) )
527 set_up_db(&sNorm, &snPtr, &sPower, &spPtr, &meanDBdur, &dbVectors);
528
529 VERB_LOG(1, "matching points...");
530
531 assert(pointNN>0 && pointNN<=O2_MAXNN);
532 assert(trackNN>0 && trackNN<=O2_MAXNN);
533
534 // We are guaranteed that the order of points is sorted by:
535 // trackID, spos, qpos
536 // so we can be relatively efficient in initialization of track data.
537 // Here we assume that points don't overlap, so we will use exhaustive dot
538 // product evaluation instead of memoization of partial sums which is used
539 // for exhaustive brute-force evaluation from smaller databases: e.g. query_loop()
540 double dist; 446 double dist;
541 size_t data_buffer_size = 0; 447 double *dbdata = 0, *dbdata_pointer;
542 double *data_buffer = 0; 448 Uns32T currentTrack = 0x80000000; // KLUDGE: Initialize with a value outside of track index range
543 Uns32T trackOffset = 0; 449 Uns32T npairs = qstate->exact_evaluation_queue->size();
544 Uns32T trackIndexOffset = 0; 450 while(npairs--) {
545 Uns32T currentTrack = 0x80000000; // Initialize with a value outside of track index range 451 PointPair pp = qstate->exact_evaluation_queue->top();
546 Uns32T npairs = exact_evaluation_queue->size(); 452 if(currentTrack != pp.trackID) {
547 while(npairs--){ 453 SAFE_DELETE_ARRAY(dbdata);
548 PointPair pp = exact_evaluation_queue->top(); 454 SAFE_DELETE_ARRAY(dbpointers.l2norm_data);
549 // Large ADB track data must be loaded here for sPower 455 SAFE_DELETE_ARRAY(dbpointers.power_data);
550 if(dbH->flags & O2_FLAG_LARGE_ADB){ 456 SAFE_DELETE_ARRAY(dbpointers.mean_duration);
551 trackOffset=0; 457 currentTrack = pp.trackID;
552 trackIndexOffset=0; 458 adb_datum_t d = {0};
553 if(currentTrack!=pp.trackID){ 459 if(audiodb_track_id_datum(adb, pp.trackID, &d)) {
554 char* prefixedString = new char[O2_MAXFILESTR]; 460 delete qstate->exact_evaluation_queue;
555 char* tmpStr = prefixedString; 461 return 1;
556 // On currentTrack change, allocate and load track data 462 }
557 currentTrack=pp.trackID; 463 if(audiodb_datum_qpointers(&d, sequence_length, &dbdata, &dbdata_pointer, &dbpointers)) {
558 SAFE_DELETE_ARRAY(sNorm); 464 delete qstate->exact_evaluation_queue;
559 SAFE_DELETE_ARRAY(sPower); 465 audiodb_free_datum(&d);
560 if(infid>0) 466 return 1;
561 close(infid); 467 }
562 // Open and check dimensions of feature file 468 audiodb_free_datum(&d);
563 strncpy(prefixedString, featureFileNameTable+pp.trackID*O2_FILETABLE_ENTRY_SIZE, O2_MAXFILESTR); 469 }
564 prefix_name((char ** const) &prefixedString, adb_feature_root); 470 Uns32T qPos = (spec->qid.flags & ADB_QID_FLAG_EXHAUSTIVE) ? pp.qpos : 0;
565 if (prefixedString!=tmpStr) 471 Uns32T sPos = pp.spos; // index into l2norm table
566 delete[] tmpStr;
567 initInputFile(prefixedString, false); // nommap, file pointer at correct position
568 // Load the feature vector data for current track into data_buffer
569 read_data(infid, pp.trackID, &data_buffer, &data_buffer_size);
570 // Load power and calculate power and l2norm sequence sums
571 init_track_aux_data(pp.trackID, data_buffer, &sNorm, &snPtr, &sPower, &spPtr);
572 }
573 }
574 else{
575 // These offsets are w.r.t. the entire database of feature vectors and auxillary variables
576 trackOffset=trackOffsetTable[pp.trackID]; // num data elements offset
577 trackIndexOffset=trackOffset/dbH->dim; // num vectors offset
578 }
579 Uns32T qPos = usingQueryPoint?0:pp.qpos;// index for query point
580 Uns32T sPos = trackIndexOffset+pp.spos; // index into l2norm table
581 // Test power thresholds before computing distance 472 // Test power thresholds before computing distance
582 if( ( !usingPower || powers_acceptable(qpPtr[qPos], sPower[sPos])) && 473 if( ( (!power_refine) || audiodb_powers_acceptable(&spec->refine, qpointers->power[qPos], dbpointers.power[sPos])) &&
583 ( qPos<numVectors-sequenceLength+1 && pp.spos<trackTable[pp.trackID]-sequenceLength+1 ) ){ 474 ( qPos<qpointers->nvectors-sequence_length+1 && sPos<(*adb->track_lengths)[pp.trackID]-sequence_length+1 ) ){
584 // Non-large ADB track data is loaded inside power test for efficiency
585 if( !(dbH->flags & O2_FLAG_LARGE_ADB) && (currentTrack!=pp.trackID) ){
586 // On currentTrack change, allocate and load track data
587 currentTrack=pp.trackID;
588 lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET);
589 read_data(dbfid, currentTrack, &data_buffer, &data_buffer_size);
590 }
591 // Compute distance 475 // Compute distance
592 dist = dot_product_points(query+qPos*dbH->dim, data_buffer+pp.spos*dbH->dim, dbH->dim*sequenceLength); 476 dist = audiodb_dot_product(query + qPos*adb->header->dim, dbdata + sPos*adb->header->dim, adb->header->dim*sequence_length);
593 double qn = qnPtr[qPos]; 477 double qn = qpointers->l2norm[qPos];
594 double sn = sNorm[sPos]; 478 double sn = dbpointers.l2norm[sPos];
595 if(normalizedDistance) 479 switch(spec->params.distance) {
480 case ADB_DISTANCE_EUCLIDEAN_NORMED:
596 dist = 2 - (2/(qn*sn))*dist; 481 dist = 2 - (2/(qn*sn))*dist;
597 else 482 break;
598 if(no_unit_norming) 483 case ADB_DISTANCE_EUCLIDEAN:
599 dist = qn*qn + sn*sn - 2*dist; 484 dist = qn*qn + sn*sn - 2*dist;
600 // else 485 break;
601 // dist = dist; 486 }
602 if((!radius) || dist <= (radius+O2_DISTANCE_TOLERANCE)) 487 if((!(spec->refine.flags & ADB_REFINE_RADIUS)) ||
603 reporter->add_point(pp.trackID, pp.qpos, pp.spos, dist); 488 dist <= (spec->refine.radius+O2_DISTANCE_TOLERANCE)) {
604 } 489 adb_result_t r;
605 exact_evaluation_queue->pop(); 490 r.key = (*adb->keys)[pp.trackID].c_str();
491 r.dist = dist;
492 r.qpos = pp.qpos;
493 r.ipos = pp.spos;
494 qstate->accumulator->add_point(&r);
495 }
496 }
497 qstate->exact_evaluation_queue->pop();
606 } 498 }
607 499
608 500
609 // Cleanup 501 // Cleanup
610 free(data_buffer); 502 SAFE_DELETE_ARRAY(dbdata);
611 SAFE_DELETE_ARRAY(sNorm); 503 SAFE_DELETE_ARRAY(dbpointers.l2norm_data);
612 SAFE_DELETE_ARRAY(sPower); 504 SAFE_DELETE_ARRAY(dbpointers.power_data);
613 SAFE_DELETE_ARRAY(meanDBdur); 505 SAFE_DELETE_ARRAY(dbpointers.mean_duration);
614 } 506 delete qstate->exact_evaluation_queue;
615 507 return 0;
616 // A completely unprotected dot-product method 508 }
617 // Caller is responsible for ensuring that memory is within bounds 509
618 inline double audioDB::dot_product_points(double* q, double* p, Uns32T L){ 510 int audiodb_query_loop(adb_t *adb, const adb_query_spec_t *spec, adb_qstate_internal_t *qstate) {
619 double dist = 0.0;
620 while(L--)
621 dist += *q++ * *p++;
622 return dist;
623 }
624
625 void audioDB::query_loop(const char* dbName, Uns32T queryIndex) {
626 511
627 unsigned int numVectors;
628 double *query, *query_data; 512 double *query, *query_data;
629 double *qNorm, *qnPtr, *qPower = 0, *qpPtr = 0; 513 adb_qpointers_internal_t qpointers = {0}, dbpointers = {0};
630 double meanQdur; 514
631 515 bool power_refine = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD);
632 if( dbH->flags & O2_FLAG_LARGE_ADB ) 516
633 error("error: LARGE_ADB requires indexed query"); 517 if(adb->header->flags & O2_FLAG_LARGE_ADB) {
634 518 /* FIXME: actually it would be nice to support this mode of
635 if(query_from_key) 519 * operation, but for now... */
636 set_up_query_from_key(&query_data, &query, &qNorm, &qnPtr, &qPower, &qpPtr, &meanQdur, &numVectors, queryIndex); 520 return 1;
637 else 521 }
638 set_up_query(&query_data, &query, &qNorm, &qnPtr, &qPower, &qpPtr, &meanQdur, &numVectors); 522
639 523 if(audiodb_query_spec_qpointers(adb, spec, &query_data, &query, &qpointers)) {
640 unsigned int dbVectors; 524 return 1;
641 double *sNorm, *snPtr, *sPower = 0, *spPtr = 0; 525 }
642 double *meanDBdur = 0; 526
643 527 if(audiodb_set_up_dbpointers(adb, spec, &dbpointers)) {
644 set_up_db(&sNorm, &snPtr, &sPower, &spPtr, &meanDBdur, &dbVectors); 528 return 1;
645 529 }
646 VERB_LOG(1, "matching tracks..."); 530
647 531 unsigned j,k,track,trackOffset=0, HOP_SIZE = spec->refine.hopsize;
648 assert(pointNN>0 && pointNN<=O2_MAXNN); 532 unsigned wL = spec->qid.sequence_length;
649 assert(trackNN>0 && trackNN<=O2_MAXNN);
650
651 unsigned j,k,track,trackOffset=0, HOP_SIZE=sequenceHop, wL=sequenceLength;
652 double **D = 0; // Differences query and target 533 double **D = 0; // Differences query and target
653 double **DD = 0; // Matched filter distance 534 double **DD = 0; // Matched filter distance
654 535
655 D = new double*[numVectors]; // pre-allocate 536 D = new double*[qpointers.nvectors]; // pre-allocate
656 DD = new double*[numVectors]; 537 DD = new double*[qpointers.nvectors];
657 538
658 gettimeofday(&tv1, NULL);
659 unsigned processedTracks = 0;
660 off_t trackIndexOffset; 539 off_t trackIndexOffset;
661 char nextKey[MAXSTR];
662 540
663 // Track loop 541 // Track loop
664 size_t data_buffer_size = 0; 542 size_t data_buffer_size = 0;
665 double *data_buffer = 0; 543 double *data_buffer = 0;
666 lseek(dbfid, dbH->dataOffset, SEEK_SET); 544 lseek(adb->fd, adb->header->dataOffset, SEEK_SET);
667 545
668 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++) { 546 std::set<std::string>::iterator keys_end = qstate->allowed_keys->end();
669 547 for(track = 0; track < adb->header->numFiles; track++) {
670 trackOffset = trackOffsetTable[track]; // numDoubles offset 548 unsigned t = track;
671 549
672 // get trackID from file if using a control file 550 while (qstate->allowed_keys->find((*adb->keys)[track]) == keys_end) {
673 if(trackFile) { 551 track++;
674 trackFile->getline(nextKey,MAXSTR); 552 if(track == adb->header->numFiles) {
675 if(!trackFile->eof()) { 553 goto loop_finish;
676 track = getKeyPos(nextKey); 554 }
677 trackOffset = trackOffsetTable[track]; 555 }
678 lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET); 556 trackOffset = (*adb->track_offsets)[track];
679 } else { 557 if(track != t) {
680 break; 558 lseek(adb->fd, adb->header->dataOffset + trackOffset, SEEK_SET);
681 } 559 }
682 } 560 trackIndexOffset = trackOffset / (adb->header->dim * sizeof(double)); // dbpointers.nvectors offset
683 561
684 // skip identity on query_from_key 562 if(audiodb_read_data(adb, adb->fd, track, &data_buffer, &data_buffer_size)) {
685 if( query_from_key && (track == queryIndex) ) { 563 return 1;
686 if(queryIndex!=dbH->numFiles-1){ 564 }
687 track++; 565 if(wL <= (*adb->track_lengths)[track]) { // test for short sequences
688 trackOffset = trackOffsetTable[track];
689 lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET);
690 }
691 else{
692 break;
693 }
694 }
695
696 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset
697
698 read_data(dbfid, track, &data_buffer, &data_buffer_size);
699 if(sequenceLength <= trackTable[track]) { // test for short sequences
700 566
701 VERB_LOG(7,"%u.%jd.%u | ", track, (intmax_t) trackIndexOffset, trackTable[track]); 567 audiodb_initialize_arrays(adb, spec, track, qpointers.nvectors, query, data_buffer, D, DD);
702 568
703 initialize_arrays(track, numVectors, query, data_buffer, D, DD); 569 if((!(spec->refine.flags & ADB_REFINE_DURATION_RATIO)) ||
704 570 fabs(dbpointers.mean_duration[track]-qpointers.mean_duration[0]) < qpointers.mean_duration[0]*spec->refine.duration_ratio) {
705 if(usingTimes) {
706 VERB_LOG(3,"meanQdur=%f meanDBdur=%f\n", meanQdur, meanDBdur[track]);
707 }
708
709 if((!usingTimes) || fabs(meanDBdur[track]-meanQdur) < meanQdur*timesTol) {
710 if(usingTimes) {
711 VERB_LOG(3,"within duration tolerance.\n");
712 }
713 571
714 // Search for minimum distance by shingles (concatenated vectors) 572 // Search for minimum distance by shingles (concatenated vectors)
715 for(j = 0; j <= numVectors - wL; j += HOP_SIZE) { 573 for(j = 0; j <= qpointers.nvectors - wL; j += HOP_SIZE) {
716 for(k = 0; k <= trackTable[track] - wL; k += HOP_SIZE) { 574 for(k = 0; k <= (*adb->track_lengths)[track] - wL; k += HOP_SIZE) {
717 double thisDist; 575 double thisDist = 0;
718 if(normalizedDistance) 576 double qn = qpointers.l2norm[j];
719 thisDist = 2-(2/(qnPtr[j]*sNorm[trackIndexOffset+k]))*DD[j][k]; 577 double sn = dbpointers.l2norm[trackIndexOffset + k];
720 else 578 switch(spec->params.distance) {
721 if(no_unit_norming) 579 case ADB_DISTANCE_EUCLIDEAN_NORMED:
722 thisDist = qnPtr[j]*qnPtr[j]+sNorm[trackIndexOffset+k]*sNorm[trackIndexOffset+k] - 2*DD[j][k]; 580 thisDist = 2-(2/(qn*sn))*DD[j][k];
723 else 581 break;
724 thisDist = DD[j][k]; 582 case ADB_DISTANCE_EUCLIDEAN:
725 583 thisDist = qn*qn + sn*sn - 2*DD[j][k];
584 break;
585 case ADB_DISTANCE_DOT_PRODUCT:
586 thisDist = DD[j][k];
587 break;
588 }
726 // Power test 589 // Power test
727 if ((!usingPower) || powers_acceptable(qpPtr[j], sPower[trackIndexOffset + k])) { 590 if ((!power_refine) || audiodb_powers_acceptable(&spec->refine, qpointers.power[j], dbpointers.power[trackIndexOffset + k])) {
728 // radius test 591 // radius test
729 if((!radius) || thisDist <= (radius+O2_DISTANCE_TOLERANCE)) { 592 if((!(spec->refine.flags & ADB_REFINE_RADIUS)) ||
730 reporter->add_point(track, usingQueryPoint ? queryPoint : j, k, thisDist); 593 thisDist <= (spec->refine.radius+O2_DISTANCE_TOLERANCE)) {
594 adb_result_t r;
595 r.key = (*adb->keys)[track].c_str();
596 r.dist = thisDist;
597 if(spec->qid.flags & ADB_QID_FLAG_EXHAUSTIVE) {
598 r.qpos = j;
599 } else {
600 r.qpos = spec->qid.sequence_start;
601 }
602 r.ipos = k;
603 qstate->accumulator->add_point(&r);
731 } 604 }
732 } 605 }
733 } 606 }
734 } 607 }
735 } // Duration match 608 } // Duration match
736 delete_arrays(track, numVectors, D, DD); 609 audiodb_delete_arrays(track, qpointers.nvectors, D, DD);
737 } 610 }
738 } 611 }
612
613 loop_finish:
739 614
740 free(data_buffer); 615 free(data_buffer);
741
742 gettimeofday(&tv2,NULL);
743 VERB_LOG(1,"elapsed time: %ld msec\n",
744 (tv2.tv_sec*1000 + tv2.tv_usec/1000) -
745 (tv1.tv_sec*1000 + tv1.tv_usec/1000))
746 616
747 // Clean up 617 // Clean up
748 if(query_data) 618 if(query_data)
749 delete[] query_data; 619 delete[] query_data;
750 if(qNorm) 620 if(qpointers.l2norm_data)
751 delete[] qNorm; 621 delete[] qpointers.l2norm_data;
752 if(sNorm) 622 if(qpointers.power_data)
753 delete[] sNorm; 623 delete[] qpointers.power_data;
754 if(qPower) 624 if(qpointers.mean_duration)
755 delete[] qPower; 625 delete[] qpointers.mean_duration;
756 if(sPower) 626 if(dbpointers.power_data)
757 delete[] sPower; 627 delete[] dbpointers.power_data;
628 if(dbpointers.l2norm_data)
629 delete[] dbpointers.l2norm_data;
758 if(D) 630 if(D)
759 delete[] D; 631 delete[] D;
760 if(DD) 632 if(DD)
761 delete[] DD; 633 delete[] DD;
762 if(meanDBdur) 634 if(dbpointers.mean_duration)
763 delete[] meanDBdur; 635 delete[] dbpointers.mean_duration;
764 } 636
765 637 return 0;
766 // Unit norm block of features 638 }
767 void audioDB::unitNorm(double* X, unsigned dim, unsigned n, double* qNorm){
768 unsigned d;
769 double L2, *p;
770
771 VERB_LOG(2, "norming %u vectors...", n);
772 while(n--) {
773 p = X;
774 L2 = 0.0;
775 d = dim;
776 while(d--) {
777 L2 += *p * *p;
778 p++;
779 }
780 if(qNorm) {
781 *qNorm++=L2;
782 }
783 X += dim;
784 }
785 VERB_LOG(2, "done.\n");
786 }
787
788