annotate query.cpp @ 509:cc2b97d020b1

Code rearrangements to tease apart library code from C++ audioDB code. There should be precisely no functional changes in this commit. Instead, the only thing that has happened is that all the abstraction violation and other horribleness is concentrated in one place: the include of "audioDB-internals.h" in audioDB.h -- the separation will be complete once that include can be removed. This include is necessary because the command-line binary / SOAP server still does some things directly rather than through an API: not least of which the operations that have not yet been integrated into the API yet, but also some messing around with constants, flags and nominally internal functions. The intent is to remove as many of these as possible and think quite hard about the rest. In the meantime, the library is now much more self-contained: the only things it uses are in the audioDB_API.h and audioDB-internals.h headers; thus there are fewer nasty surprises lurking for readers of the code. The Makefile has been adjusted to take advantage of this rearrangement in the dependencies.
author mas01cr
date Thu, 15 Jan 2009 13:57:33 +0000
parents 342822c2d49a
children 7ee6a2701d90 633614461994
rev   line source
mas01cr@509 1 extern "C" {
mas01cr@509 2 #include "audioDB_API.h"
mas01cr@509 3 }
mas01cr@498 4 #include "audioDB-internals.h"
mas01cr@498 5 #include "accumulators.h"
mas01cr@239 6
mas01cr@498 7 bool audiodb_powers_acceptable(const adb_query_refine_t *r, double p1, double p2) {
mas01cr@498 8 if (r->flags & ADB_REFINE_ABSOLUTE_THRESHOLD) {
mas01cr@498 9 if ((p1 < r->absolute_threshold) || (p2 < r->absolute_threshold)) {
mas01cr@239 10 return false;
mas01cr@239 11 }
mas01cr@239 12 }
mas01cr@498 13 if (r->flags & ADB_REFINE_RELATIVE_THRESHOLD) {
mas01cr@498 14 if (fabs(p1-p2) > fabs(r->relative_threshold)) {
mas01cr@239 15 return false;
mas01cr@239 16 }
mas01cr@239 17 }
mas01cr@239 18 return true;
mas01cr@239 19 }
mas01cr@239 20
mas01cr@498 21 adb_query_results_t *audiodb_query_spec(adb_t *adb, const adb_query_spec_t *qspec) {
mas01cr@498 22 adb_qstate_internal_t qstate = {0};
mas01cr@498 23 qstate.allowed_keys = new std::set<std::string>;
mas01cr@498 24 adb_query_results_t *results;
mas01cr@498 25 if(qspec->refine.flags & ADB_REFINE_INCLUDE_KEYLIST) {
mas01cr@498 26 for(unsigned int k = 0; k < qspec->refine.include.nkeys; k++) {
mas01cr@498 27 qstate.allowed_keys->insert(qspec->refine.include.keys[k]);
mas01cr@498 28 }
mas01cr@498 29 } else {
mas01cr@498 30 for(unsigned int k = 0; k < adb->header->numFiles; k++) {
mas01cr@498 31 qstate.allowed_keys->insert((*adb->keys)[k]);
mas01cr@498 32 }
mas01cr@498 33 }
mas01cr@498 34 if(qspec->refine.flags & ADB_REFINE_EXCLUDE_KEYLIST) {
mas01cr@498 35 for(unsigned int k = 0; k < qspec->refine.exclude.nkeys; k++) {
mas01cr@498 36 qstate.allowed_keys->erase(qspec->refine.exclude.keys[k]);
mas01cr@498 37 }
mas01cr@498 38 }
mas01mc@292 39
mas01cr@498 40 switch(qspec->params.distance) {
mas01cr@498 41 case ADB_DISTANCE_DOT_PRODUCT:
mas01cr@498 42 switch(qspec->params.accumulation) {
mas01cr@498 43 case ADB_ACCUMULATION_DB:
mas01cr@498 44 qstate.accumulator = new DBAccumulator<adb_result_dist_gt>(qspec->params.npoints);
mas01cr@498 45 break;
mas01cr@498 46 case ADB_ACCUMULATION_PER_TRACK:
mas01cr@498 47 qstate.accumulator = new PerTrackAccumulator<adb_result_dist_gt>(qspec->params.npoints, qspec->params.ntracks);
mas01cr@498 48 break;
mas01cr@498 49 case ADB_ACCUMULATION_ONE_TO_ONE:
mas01cr@498 50 qstate.accumulator = new NearestAccumulator<adb_result_dist_gt>();
mas01cr@498 51 break;
mas01cr@498 52 default:
mas01cr@498 53 goto error;
mas01cr@239 54 }
mas01cr@239 55 break;
mas01cr@498 56 case ADB_DISTANCE_EUCLIDEAN_NORMED:
mas01cr@498 57 case ADB_DISTANCE_EUCLIDEAN:
mas01cr@498 58 switch(qspec->params.accumulation) {
mas01cr@498 59 case ADB_ACCUMULATION_DB:
mas01cr@498 60 qstate.accumulator = new DBAccumulator<adb_result_dist_lt>(qspec->params.npoints);
mas01cr@498 61 break;
mas01cr@498 62 case ADB_ACCUMULATION_PER_TRACK:
mas01cr@498 63 qstate.accumulator = new PerTrackAccumulator<adb_result_dist_lt>(qspec->params.npoints, qspec->params.ntracks);
mas01cr@498 64 break;
mas01cr@498 65 case ADB_ACCUMULATION_ONE_TO_ONE:
mas01cr@498 66 qstate.accumulator = new NearestAccumulator<adb_result_dist_lt>();
mas01cr@498 67 break;
mas01cr@498 68 default:
mas01cr@498 69 goto error;
mas01mc@263 70 }
mas01mc@263 71 break;
mas01cr@239 72 default:
mas01cr@498 73 goto error;
mas01mc@329 74 }
mas01cr@498 75
mas01cr@498 76 if((qspec->refine.flags & ADB_REFINE_RADIUS) && audiodb_index_exists(adb->path, qspec->refine.radius, qspec->qid.sequence_length)) {
mas01cr@498 77 if(audiodb_index_query_loop(adb, qspec, &qstate) < 0) {
mas01cr@498 78 goto error;
mas01cr@498 79 }
mas01cr@498 80 } else {
mas01cr@498 81 if(audiodb_query_loop(adb, qspec, &qstate)) {
mas01cr@498 82 goto error;
mas01cr@498 83 }
mas01mc@329 84 }
mas01mc@292 85
mas01cr@498 86 results = qstate.accumulator->get_points();
mas01cr@498 87
mas01cr@498 88 delete qstate.accumulator;
mas01cr@498 89 delete qstate.allowed_keys;
mas01cr@498 90
mas01cr@498 91 return results;
mas01cr@498 92
mas01cr@498 93 error:
mas01cr@498 94 if(qstate.accumulator)
mas01cr@498 95 delete qstate.accumulator;
mas01cr@498 96 if(qstate.allowed_keys)
mas01cr@498 97 delete qstate.allowed_keys;
mas01cr@498 98 return NULL;
mas01cr@239 99 }
mas01cr@239 100
mas01cr@498 101 int audiodb_query_free_results(adb_t *adb, const adb_query_spec_t *spec, adb_query_results_t *rs) {
mas01cr@498 102 free(rs->results);
mas01cr@498 103 free(rs);
mas01cr@498 104 return 0;
mas01cr@239 105 }
mas01cr@239 106
mas01cr@498 107 static void audiodb_initialize_arrays(adb_t *adb, const adb_query_spec_t *spec, int track, unsigned int numVectors, double *query, double *data_buffer, double **D, double **DD) {
mas01cr@239 108 unsigned int j, k, l, w;
mas01cr@239 109 double *dp, *qp, *sp;
mas01cr@239 110
mas01cr@498 111 const unsigned HOP_SIZE = spec->refine.hopsize;
mas01cr@498 112 const unsigned wL = spec->qid.sequence_length;
mas01cr@239 113
mas01cr@239 114 for(j = 0; j < numVectors; j++) {
mas01cr@239 115 // Sum products matrix
mas01cr@498 116 D[j] = new double[(*adb->track_lengths)[track]];
mas01cr@239 117 assert(D[j]);
mas01cr@239 118 // Matched filter matrix
mas01cr@498 119 DD[j]=new double[(*adb->track_lengths)[track]];
mas01cr@239 120 assert(DD[j]);
mas01cr@239 121 }
mas01cr@239 122
mas01cr@239 123 // Dot product
mas01cr@239 124 for(j = 0; j < numVectors; j++)
mas01cr@498 125 for(k = 0; k < (*adb->track_lengths)[track]; k++){
mas01cr@498 126 qp = query + j * adb->header->dim;
mas01cr@498 127 sp = data_buffer + k * adb->header->dim;
mas01cr@239 128 DD[j][k] = 0.0; // Initialize matched filter array
mas01cr@239 129 dp = &D[j][k]; // point to correlation cell j,k
mas01cr@239 130 *dp = 0.0; // initialize correlation cell
mas01cr@498 131 l = adb->header->dim; // size of vectors
mas01cr@239 132 while(l--)
mas01cr@239 133 *dp += *qp++ * *sp++;
mas01cr@239 134 }
mas01cr@239 135
mas01cr@239 136 // Matched Filter
mas01cr@239 137 // HOP SIZE == 1
mas01cr@239 138 double* spd;
mas01cr@239 139 if(HOP_SIZE == 1) { // HOP_SIZE = shingleHop
mas01cr@239 140 for(w = 0; w < wL; w++) {
mas01cr@239 141 for(j = 0; j < numVectors - w; j++) {
mas01cr@239 142 sp = DD[j];
mas01cr@239 143 spd = D[j+w] + w;
mas01cr@498 144 k = (*adb->track_lengths)[track] - w;
mas01mc@292 145 while(k--)
mas01mc@292 146 *sp++ += *spd++;
mas01cr@239 147 }
mas01cr@239 148 }
mas01cr@239 149 } else { // HOP_SIZE != 1
mas01cr@239 150 for(w = 0; w < wL; w++) {
mas01cr@239 151 for(j = 0; j < numVectors - w; j += HOP_SIZE) {
mas01cr@239 152 sp = DD[j];
mas01cr@239 153 spd = D[j+w]+w;
mas01cr@498 154 for(k = 0; k < (*adb->track_lengths)[track] - w; k += HOP_SIZE) {
mas01cr@239 155 *sp += *spd;
mas01cr@239 156 sp += HOP_SIZE;
mas01cr@239 157 spd += HOP_SIZE;
mas01cr@239 158 }
mas01cr@239 159 }
mas01cr@239 160 }
mas01cr@239 161 }
mas01cr@239 162 }
mas01cr@239 163
mas01cr@498 164 static void audiodb_delete_arrays(int track, unsigned int numVectors, double **D, double **DD) {
mas01cr@239 165 if(D != NULL) {
mas01cr@239 166 for(unsigned int j = 0; j < numVectors; j++) {
mas01cr@239 167 delete[] D[j];
mas01cr@239 168 }
mas01cr@239 169 }
mas01cr@239 170 if(DD != NULL) {
mas01cr@239 171 for(unsigned int j = 0; j < numVectors; j++) {
mas01cr@239 172 delete[] DD[j];
mas01cr@239 173 }
mas01cr@239 174 }
mas01cr@239 175 }
mas01cr@239 176
mas01cr@498 177 int audiodb_read_data(adb_t *adb, int trkfid, int track, double **data_buffer_p, size_t *data_buffer_size_p) {
mas01cr@498 178 uint32_t track_length = (*adb->track_lengths)[track];
mas01cr@498 179 size_t track_size = track_length * sizeof(double) * adb->header->dim;
mas01cr@498 180 if (track_size > *data_buffer_size_p) {
mas01cr@239 181 if(*data_buffer_p) {
mas01cr@239 182 free(*data_buffer_p);
mas01cr@239 183 }
mas01cr@239 184 {
mas01cr@498 185 *data_buffer_size_p = track_size;
mas01cr@498 186 void *tmp = malloc(track_size);
mas01cr@239 187 if (tmp == NULL) {
mas01cr@498 188 goto error;
mas01cr@239 189 }
mas01cr@239 190 *data_buffer_p = (double *) tmp;
mas01cr@239 191 }
mas01cr@239 192 }
mas01cr@239 193
mas01cr@498 194 read_or_goto_error(trkfid, *data_buffer_p, track_size);
mas01cr@498 195 return 0;
mas01cr@498 196
mas01cr@498 197 error:
mas01cr@498 198 return 1;
mas01cr@239 199 }
mas01cr@239 200
mas01cr@498 201 int audiodb_track_id_datum(adb_t *adb, uint32_t track_id, adb_datum_t *d) {
mas01cr@498 202 off_t track_offset = (*adb->track_offsets)[track_id];
mas01cr@509 203 if(adb->header->flags & ADB_HEADER_FLAG_REFERENCES) {
mas01cr@498 204 /* create a reference/insert, then use adb_insert_create_datum() */
mas01cr@498 205 adb_reference_t reference = {0};
mas01cr@509 206 char features[ADB_MAXSTR], power[ADB_MAXSTR], times[ADB_MAXSTR];
mas01cr@509 207 lseek(adb->fd, adb->header->dataOffset + track_id * ADB_FILETABLE_ENTRY_SIZE, SEEK_SET);
mas01cr@509 208 read_or_goto_error(adb->fd, features, ADB_MAXSTR);
mas01cr@498 209 reference.features = features;
mas01cr@509 210 if(adb->header->flags & ADB_HEADER_FLAG_POWER) {
mas01cr@509 211 lseek(adb->fd, adb->header->powerTableOffset + track_id * ADB_FILETABLE_ENTRY_SIZE, SEEK_SET);
mas01cr@509 212 read_or_goto_error(adb->fd, power, ADB_MAXSTR);
mas01cr@498 213 reference.power = power;
mas01cr@498 214 }
mas01cr@509 215 if(adb->header->flags & ADB_HEADER_FLAG_TIMES) {
mas01cr@509 216 lseek(adb->fd, adb->header->timesTableOffset + track_id * ADB_FILETABLE_ENTRY_SIZE, SEEK_SET);
mas01cr@509 217 read_or_goto_error(adb->fd, times, ADB_MAXSTR);
mas01cr@498 218 reference.times = times;
mas01cr@498 219 }
mas01cr@498 220 return audiodb_insert_create_datum(&reference, d);
mas01cr@498 221 } else {
mas01cr@498 222 /* initialize from sources of data that we already have */
mas01cr@498 223 d->nvectors = (*adb->track_lengths)[track_id];
mas01cr@498 224 d->dim = adb->header->dim;
mas01cr@498 225 d->key = (*adb->keys)[track_id].c_str();
mas01cr@498 226 /* read out stuff from the database tables */
mas01cr@498 227 d->data = (double *) malloc(d->nvectors * d->dim * sizeof(double));
mas01cr@498 228 lseek(adb->fd, adb->header->dataOffset + track_offset, SEEK_SET);
mas01cr@498 229 read_or_goto_error(adb->fd, d->data, d->nvectors * d->dim * sizeof(double));
mas01cr@509 230 if(adb->header->flags & ADB_HEADER_FLAG_POWER) {
mas01cr@498 231 d->power = (double *) malloc(d->nvectors * sizeof(double));
mas01cr@498 232 lseek(adb->fd, adb->header->powerTableOffset + track_offset / d->dim, SEEK_SET);
mas01cr@498 233 read_or_goto_error(adb->fd, d->power, d->nvectors * sizeof(double));
mas01cr@498 234 }
mas01cr@509 235 if(adb->header->flags & ADB_HEADER_FLAG_TIMES) {
mas01cr@498 236 d->times = (double *) malloc(2 * d->nvectors * sizeof(double));
mas01cr@498 237 lseek(adb->fd, adb->header->timesTableOffset + track_offset / d->dim, SEEK_SET);
mas01cr@498 238 read_or_goto_error(adb->fd, d->times, 2 * d->nvectors * sizeof(double));
mas01cr@498 239 }
mas01cr@498 240 return 0;
mas01cr@498 241 }
mas01cr@498 242 error:
mas01cr@498 243 audiodb_free_datum(d);
mas01cr@498 244 return 1;
mas01cr@498 245 }
mas01mc@292 246
mas01cr@498 247 int audiodb_datum_qpointers(adb_datum_t *d, uint32_t sequence_length, double **vector_data, double **vector, adb_qpointers_internal_t *qpointers) {
mas01cr@498 248 uint32_t nvectors = d->nvectors;
mas01cr@498 249
mas01cr@498 250 qpointers->nvectors = nvectors;
mas01cr@498 251
mas01cr@498 252 size_t vector_size = nvectors * sizeof(double) * d->dim;
mas01cr@498 253 *vector_data = new double[vector_size];
mas01cr@498 254 memcpy(*vector_data, d->data, vector_size);
mas01cr@498 255
mas01cr@498 256 qpointers->l2norm_data = new double[vector_size / d->dim];
mas01cr@498 257 audiodb_l2norm_buffer(*vector_data, d->dim, nvectors, qpointers->l2norm_data);
mas01cr@498 258 audiodb_sequence_sum(qpointers->l2norm_data, nvectors, sequence_length);
mas01cr@498 259 audiodb_sequence_sqrt(qpointers->l2norm_data, nvectors, sequence_length);
mas01cr@498 260
mas01cr@498 261 if(d->power) {
mas01cr@498 262 qpointers->power_data = new double[vector_size / d->dim];
mas01cr@498 263 memcpy(qpointers->power_data, d->power, vector_size / d->dim);
mas01cr@498 264 audiodb_sequence_sum(qpointers->power_data, nvectors, sequence_length);
mas01cr@498 265 audiodb_sequence_average(qpointers->power_data, nvectors, sequence_length);
mas01cr@239 266 }
mas01cr@239 267
mas01cr@498 268 if(d->times) {
mas01cr@498 269 qpointers->mean_duration = new double[1];
mas01cr@498 270 *qpointers->mean_duration = 0;
mas01cr@498 271 for(unsigned int k = 0; k < nvectors; k++) {
mas01cr@498 272 *qpointers->mean_duration += d->times[2*k+1] - d->times[2*k];
mas01cr@239 273 }
mas01cr@498 274 *qpointers->mean_duration /= nvectors;
mas01cr@239 275 }
mas01cr@239 276
mas01cr@498 277 *vector = *vector_data;
mas01cr@498 278 qpointers->l2norm = qpointers->l2norm_data;
mas01cr@498 279 qpointers->power = qpointers->power_data;
mas01cr@498 280 return 0;
mas01cr@498 281 }
mas01cr@498 282
mas01cr@498 283 int audiodb_query_spec_qpointers(adb_t *adb, const adb_query_spec_t *spec, double **vector_data, double **vector, adb_qpointers_internal_t *qpointers) {
mas01cr@498 284 adb_datum_t *datum;
mas01cr@498 285 adb_datum_t d = {0};
mas01cr@498 286 uint32_t sequence_length;
mas01cr@498 287 uint32_t sequence_start;
mas01cr@498 288
mas01cr@498 289 datum = spec->qid.datum;
mas01cr@498 290 sequence_length = spec->qid.sequence_length;
mas01cr@498 291 sequence_start = spec->qid.sequence_start;
mas01cr@498 292
mas01cr@498 293 if(datum->data) {
mas01cr@498 294 if(datum->dim != adb->header->dim) {
mas01cr@498 295 return 1;
mas01cr@239 296 }
mas01cr@498 297 /* initialize d, and mark that nothing needs freeing later. */
mas01cr@498 298 d = *datum;
mas01cr@498 299 datum = &d;
mas01cr@498 300 } else if (datum->key) {
mas01cr@498 301 uint32_t track_id;
mas01cr@498 302 if((track_id = audiodb_key_index(adb, datum->key)) == (uint32_t) -1) {
mas01cr@498 303 return 1;
mas01cr@498 304 }
mas01cr@498 305 audiodb_track_id_datum(adb, track_id, &d);
mas01cr@498 306 } else {
mas01cr@498 307 return 1;
mas01cr@239 308 }
mas01cr@239 309
mas01cr@498 310 /* FIXME: check the overflow logic here */
mas01cr@498 311 if(sequence_start + sequence_length > d.nvectors) {
mas01cr@498 312 if(datum != &d) {
mas01cr@498 313 audiodb_free_datum(&d);
mas01cr@498 314 }
mas01cr@498 315 return 1;
mas01cr@498 316 }
mas01cr@239 317
mas01cr@498 318 audiodb_datum_qpointers(&d, sequence_length, vector_data, vector, qpointers);
mas01cr@498 319
mas01cr@498 320 /* Finally, if applicable, set up the moving qpointers. */
mas01cr@498 321 if(spec->qid.flags & ADB_QID_FLAG_EXHAUSTIVE) {
mas01cr@498 322 /* the qpointers are already at the start, and so correct. */
mas01cr@498 323 } else {
mas01cr@498 324 /* adjust the qpointers to point to the correct place in the sequence */
mas01cr@498 325 *vector = *vector_data + spec->qid.sequence_start * d.dim;
mas01cr@498 326 qpointers->l2norm = qpointers->l2norm_data + spec->qid.sequence_start;
mas01cr@498 327 if(d.power) {
mas01cr@498 328 qpointers->power = qpointers->power_data + spec->qid.sequence_start;
mas01cr@239 329 }
mas01cr@498 330 qpointers->nvectors = sequence_length;
mas01cr@239 331 }
mas01cr@498 332
mas01cr@498 333 /* Clean up: free any bits of datum that we have ourselves
mas01cr@498 334 * allocated. */
mas01cr@498 335 if(datum != &d) {
mas01cr@498 336 audiodb_free_datum(&d);
mas01cr@498 337 }
mas01cr@498 338
mas01cr@498 339 return 0;
mas01cr@239 340 }
mas01cr@239 341
mas01cr@498 342 static int audiodb_set_up_dbpointers(adb_t *adb, const adb_query_spec_t *spec, adb_qpointers_internal_t *dbpointers) {
mas01cr@498 343 uint32_t nvectors = adb->header->length / (adb->header->dim * sizeof(double));
mas01cr@498 344 uint32_t sequence_length = spec->qid.sequence_length;
mas01mc@292 345
mas01cr@498 346 bool using_power = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD);
mas01cr@498 347 bool using_times = spec->refine.flags & ADB_REFINE_DURATION_RATIO;
mas01cr@498 348 double *times_table = NULL;
mas01cr@498 349
mas01cr@498 350
mas01cr@498 351 dbpointers->nvectors = nvectors;
mas01cr@498 352 dbpointers->l2norm_data = new double[nvectors];
mas01cr@498 353
mas01cr@498 354 double *snpp = dbpointers->l2norm_data, *sppp = 0;
mas01cr@498 355 lseek(adb->fd, adb->header->l2normTableOffset, SEEK_SET);
mas01cr@498 356 read_or_goto_error(adb->fd, dbpointers->l2norm_data, nvectors * sizeof(double));
mas01cr@498 357
mas01cr@498 358 if (using_power) {
mas01cr@509 359 if (!(adb->header->flags & ADB_HEADER_FLAG_POWER)) {
mas01cr@498 360 goto error;
mas01cr@498 361 }
mas01cr@498 362 dbpointers->power_data = new double[nvectors];
mas01cr@498 363 sppp = dbpointers->power_data;
mas01cr@498 364 lseek(adb->fd, adb->header->powerTableOffset, SEEK_SET);
mas01cr@498 365 read_or_goto_error(adb->fd, dbpointers->power_data, nvectors * sizeof(double));
mas01mc@292 366 }
mas01mc@292 367
mas01cr@498 368 for(unsigned int i = 0; i < adb->header->numFiles; i++){
mas01cr@498 369 size_t track_length = (*adb->track_lengths)[i];
mas01cr@498 370 if(track_length >= sequence_length) {
mas01cr@498 371 audiodb_sequence_sum(snpp, track_length, sequence_length);
mas01cr@498 372 audiodb_sequence_sqrt(snpp, track_length, sequence_length);
mas01cr@498 373 if (using_power) {
mas01cr@498 374 audiodb_sequence_sum(sppp, track_length, sequence_length);
mas01cr@498 375 audiodb_sequence_average(sppp, track_length, sequence_length);
mas01cr@498 376 }
mas01mc@324 377 }
mas01cr@498 378 snpp += track_length;
mas01cr@498 379 if (using_power) {
mas01cr@498 380 sppp += track_length;
mas01mc@324 381 }
mas01mc@292 382 }
mas01mc@292 383
mas01cr@498 384 if (using_times) {
mas01cr@509 385 if(!(adb->header->flags & ADB_HEADER_FLAG_TIMES)) {
mas01cr@498 386 goto error;
mas01cr@498 387 }
mas01mc@292 388
mas01cr@498 389 dbpointers->mean_duration = new double[adb->header->numFiles];
mas01cr@498 390
mas01cr@498 391 times_table = (double *) malloc(2 * nvectors * sizeof(double));
mas01cr@498 392 if(!times_table) {
mas01cr@498 393 goto error;
mas01cr@498 394 }
mas01cr@498 395 lseek(adb->fd, adb->header->timesTableOffset, SEEK_SET);
mas01cr@498 396 read_or_goto_error(adb->fd, times_table, 2 * nvectors * sizeof(double));
mas01cr@498 397 for(unsigned int k = 0; k < adb->header->numFiles; k++) {
mas01cr@498 398 size_t track_length = (*adb->track_lengths)[k];
mas01cr@498 399 unsigned int j;
mas01cr@498 400 dbpointers->mean_duration[k] = 0.0;
mas01cr@498 401 for(j = 0; j < track_length; j++) {
mas01cr@498 402 dbpointers->mean_duration[k] += times_table[2*j+1] - times_table[2*j];
mas01mc@292 403 }
mas01cr@498 404 dbpointers->mean_duration[k] /= j;
mas01mc@292 405 }
mas01cr@498 406
mas01cr@498 407 free(times_table);
mas01cr@498 408 times_table = NULL;
mas01mc@292 409 }
mas01cr@498 410
mas01cr@498 411 dbpointers->l2norm = dbpointers->l2norm_data;
mas01cr@498 412 dbpointers->power = dbpointers->power_data;
mas01cr@498 413 return 0;
mas01cr@498 414
mas01cr@498 415 error:
mas01cr@498 416 if(dbpointers->l2norm_data) {
mas01cr@498 417 delete [] dbpointers->l2norm_data;
mas01cr@498 418 }
mas01cr@498 419 if(dbpointers->power_data) {
mas01cr@498 420 delete [] dbpointers->power_data;
mas01cr@498 421 }
mas01cr@498 422 if(dbpointers->mean_duration) {
mas01cr@498 423 delete [] dbpointers->mean_duration;
mas01cr@498 424 }
mas01cr@498 425 if(times_table) {
mas01cr@498 426 free(times_table);
mas01cr@498 427 }
mas01cr@498 428 return 1;
mas01cr@498 429
mas01mc@292 430 }
mas01mc@292 431
mas01cr@498 432 int audiodb_query_queue_loop(adb_t *adb, const adb_query_spec_t *spec, adb_qstate_internal_t *qstate, double *query, adb_qpointers_internal_t *qpointers) {
mas01cr@498 433 adb_qpointers_internal_t dbpointers = {0};
mas01mc@292 434
mas01cr@498 435 uint32_t sequence_length = spec->qid.sequence_length;
mas01cr@498 436 bool power_refine = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD);
mas01cr@239 437
mas01cr@498 438 if(qstate->exact_evaluation_queue->size() == 0) {
mas01cr@498 439 return 0;
mas01cr@239 440 }
mas01cr@239 441
mas01cr@498 442 /* We are guaranteed that the order of points is sorted by:
mas01cr@498 443 * {trackID, spos, qpos} so we can be relatively efficient in
mas01cr@498 444 * initialization of track data. We assume that points usually
mas01cr@498 445 * don't overlap, so we will use exhaustive dot product evaluation
mas01cr@498 446 * (instead of memoization of partial sums, as in query_loop()).
mas01cr@498 447 */
mas01cr@498 448 double dist;
mas01cr@498 449 double *dbdata = 0, *dbdata_pointer;
mas01cr@498 450 Uns32T currentTrack = 0x80000000; // KLUDGE: Initialize with a value outside of track index range
mas01cr@498 451 Uns32T npairs = qstate->exact_evaluation_queue->size();
mas01cr@498 452 while(npairs--) {
mas01cr@498 453 PointPair pp = qstate->exact_evaluation_queue->top();
mas01cr@498 454 if(currentTrack != pp.trackID) {
mas01cr@509 455 maybe_delete_array(dbdata);
mas01cr@509 456 maybe_delete_array(dbpointers.l2norm_data);
mas01cr@509 457 maybe_delete_array(dbpointers.power_data);
mas01cr@509 458 maybe_delete_array(dbpointers.mean_duration);
mas01cr@498 459 currentTrack = pp.trackID;
mas01cr@498 460 adb_datum_t d = {0};
mas01cr@498 461 if(audiodb_track_id_datum(adb, pp.trackID, &d)) {
mas01cr@498 462 delete qstate->exact_evaluation_queue;
mas01cr@498 463 return 1;
mas01cr@498 464 }
mas01cr@498 465 if(audiodb_datum_qpointers(&d, sequence_length, &dbdata, &dbdata_pointer, &dbpointers)) {
mas01cr@498 466 delete qstate->exact_evaluation_queue;
mas01cr@498 467 audiodb_free_datum(&d);
mas01cr@498 468 return 1;
mas01cr@498 469 }
mas01cr@498 470 audiodb_free_datum(&d);
mas01cr@498 471 }
mas01cr@498 472 Uns32T qPos = (spec->qid.flags & ADB_QID_FLAG_EXHAUSTIVE) ? pp.qpos : 0;
mas01cr@498 473 Uns32T sPos = pp.spos; // index into l2norm table
mas01cr@498 474 // Test power thresholds before computing distance
mas01cr@498 475 if( ( (!power_refine) || audiodb_powers_acceptable(&spec->refine, qpointers->power[qPos], dbpointers.power[sPos])) &&
mas01cr@498 476 ( qPos<qpointers->nvectors-sequence_length+1 && sPos<(*adb->track_lengths)[pp.trackID]-sequence_length+1 ) ){
mas01cr@498 477 // Compute distance
mas01cr@498 478 dist = audiodb_dot_product(query + qPos*adb->header->dim, dbdata + sPos*adb->header->dim, adb->header->dim*sequence_length);
mas01cr@498 479 double qn = qpointers->l2norm[qPos];
mas01cr@498 480 double sn = dbpointers.l2norm[sPos];
mas01cr@498 481 switch(spec->params.distance) {
mas01cr@498 482 case ADB_DISTANCE_EUCLIDEAN_NORMED:
mas01cr@498 483 dist = 2 - (2/(qn*sn))*dist;
mas01cr@498 484 break;
mas01cr@498 485 case ADB_DISTANCE_EUCLIDEAN:
mas01cr@498 486 dist = qn*qn + sn*sn - 2*dist;
mas01cr@498 487 break;
mas01cr@498 488 }
mas01cr@498 489 if((!(spec->refine.flags & ADB_REFINE_RADIUS)) ||
mas01cr@509 490 dist <= (spec->refine.radius + ADB_DISTANCE_TOLERANCE)) {
mas01cr@498 491 adb_result_t r;
mas01cr@498 492 r.key = (*adb->keys)[pp.trackID].c_str();
mas01cr@498 493 r.dist = dist;
mas01cr@498 494 r.qpos = pp.qpos;
mas01cr@498 495 r.ipos = pp.spos;
mas01cr@498 496 qstate->accumulator->add_point(&r);
mas01cr@239 497 }
mas01cr@239 498 }
mas01cr@498 499 qstate->exact_evaluation_queue->pop();
mas01mc@292 500 }
mas01mc@474 501
mas01mc@315 502 // Cleanup
mas01cr@509 503 maybe_delete_array(dbdata);
mas01cr@509 504 maybe_delete_array(dbpointers.l2norm_data);
mas01cr@509 505 maybe_delete_array(dbpointers.power_data);
mas01cr@509 506 maybe_delete_array(dbpointers.mean_duration);
mas01cr@498 507 delete qstate->exact_evaluation_queue;
mas01cr@498 508 return 0;
mas01mc@292 509 }
mas01mc@292 510
mas01cr@498 511 int audiodb_query_loop(adb_t *adb, const adb_query_spec_t *spec, adb_qstate_internal_t *qstate) {
mas01cr@498 512
mas01cr@498 513 double *query, *query_data;
mas01cr@498 514 adb_qpointers_internal_t qpointers = {0}, dbpointers = {0};
mas01mc@292 515
mas01cr@498 516 bool power_refine = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD);
mas01cr@239 517
mas01cr@509 518 if(adb->header->flags & ADB_HEADER_FLAG_REFERENCES) {
mas01cr@498 519 /* FIXME: actually it would be nice to support this mode of
mas01cr@498 520 * operation, but for now... */
mas01cr@498 521 return 1;
mas01cr@498 522 }
mas01mc@324 523
mas01cr@498 524 if(audiodb_query_spec_qpointers(adb, spec, &query_data, &query, &qpointers)) {
mas01cr@498 525 return 1;
mas01cr@498 526 }
mas01cr@239 527
mas01cr@498 528 if(audiodb_set_up_dbpointers(adb, spec, &dbpointers)) {
mas01cr@498 529 return 1;
mas01cr@498 530 }
mas01cr@239 531
mas01cr@498 532 unsigned j,k,track,trackOffset=0, HOP_SIZE = spec->refine.hopsize;
mas01cr@498 533 unsigned wL = spec->qid.sequence_length;
mas01cr@239 534 double **D = 0; // Differences query and target
mas01cr@239 535 double **DD = 0; // Matched filter distance
mas01cr@239 536
mas01cr@498 537 D = new double*[qpointers.nvectors]; // pre-allocate
mas01cr@498 538 DD = new double*[qpointers.nvectors];
mas01cr@239 539
mas01cr@239 540 off_t trackIndexOffset;
mas01cr@239 541
mas01cr@239 542 // Track loop
mas01cr@239 543 size_t data_buffer_size = 0;
mas01cr@239 544 double *data_buffer = 0;
mas01cr@498 545 lseek(adb->fd, adb->header->dataOffset, SEEK_SET);
mas01cr@239 546
mas01cr@498 547 std::set<std::string>::iterator keys_end = qstate->allowed_keys->end();
mas01cr@498 548 for(track = 0; track < adb->header->numFiles; track++) {
mas01cr@498 549 unsigned t = track;
mas01cr@498 550
mas01cr@498 551 while (qstate->allowed_keys->find((*adb->keys)[track]) == keys_end) {
mas01cr@498 552 track++;
mas01cr@498 553 if(track == adb->header->numFiles) {
mas01cr@498 554 goto loop_finish;
mas01cr@239 555 }
mas01cr@239 556 }
mas01cr@498 557 trackOffset = (*adb->track_offsets)[track];
mas01cr@498 558 if(track != t) {
mas01cr@498 559 lseek(adb->fd, adb->header->dataOffset + trackOffset, SEEK_SET);
mas01cr@498 560 }
mas01cr@498 561 trackIndexOffset = trackOffset / (adb->header->dim * sizeof(double)); // dbpointers.nvectors offset
mas01cr@239 562
mas01cr@498 563 if(audiodb_read_data(adb, adb->fd, track, &data_buffer, &data_buffer_size)) {
mas01cr@498 564 return 1;
mas01mc@292 565 }
mas01cr@498 566 if(wL <= (*adb->track_lengths)[track]) { // test for short sequences
mas01cr@498 567
mas01cr@498 568 audiodb_initialize_arrays(adb, spec, track, qpointers.nvectors, query, data_buffer, D, DD);
mas01mc@292 569
mas01cr@498 570 if((!(spec->refine.flags & ADB_REFINE_DURATION_RATIO)) ||
mas01cr@498 571 fabs(dbpointers.mean_duration[track]-qpointers.mean_duration[0]) < qpointers.mean_duration[0]*spec->refine.duration_ratio) {
mas01cr@239 572
mas01cr@239 573 // Search for minimum distance by shingles (concatenated vectors)
mas01cr@498 574 for(j = 0; j <= qpointers.nvectors - wL; j += HOP_SIZE) {
mas01cr@498 575 for(k = 0; k <= (*adb->track_lengths)[track] - wL; k += HOP_SIZE) {
mas01cr@498 576 double thisDist = 0;
mas01cr@498 577 double qn = qpointers.l2norm[j];
mas01cr@498 578 double sn = dbpointers.l2norm[trackIndexOffset + k];
mas01cr@498 579 switch(spec->params.distance) {
mas01cr@498 580 case ADB_DISTANCE_EUCLIDEAN_NORMED:
mas01cr@498 581 thisDist = 2-(2/(qn*sn))*DD[j][k];
mas01cr@498 582 break;
mas01cr@498 583 case ADB_DISTANCE_EUCLIDEAN:
mas01cr@498 584 thisDist = qn*qn + sn*sn - 2*DD[j][k];
mas01cr@498 585 break;
mas01cr@498 586 case ADB_DISTANCE_DOT_PRODUCT:
mas01cr@498 587 thisDist = DD[j][k];
mas01cr@498 588 break;
mas01cr@498 589 }
mas01cr@239 590 // Power test
mas01cr@498 591 if ((!power_refine) || audiodb_powers_acceptable(&spec->refine, qpointers.power[j], dbpointers.power[trackIndexOffset + k])) {
mas01cr@239 592 // radius test
mas01cr@498 593 if((!(spec->refine.flags & ADB_REFINE_RADIUS)) ||
mas01cr@509 594 thisDist <= (spec->refine.radius + ADB_DISTANCE_TOLERANCE)) {
mas01cr@498 595 adb_result_t r;
mas01cr@498 596 r.key = (*adb->keys)[track].c_str();
mas01cr@498 597 r.dist = thisDist;
mas01cr@498 598 if(spec->qid.flags & ADB_QID_FLAG_EXHAUSTIVE) {
mas01cr@498 599 r.qpos = j;
mas01cr@498 600 } else {
mas01cr@498 601 r.qpos = spec->qid.sequence_start;
mas01cr@498 602 }
mas01cr@498 603 r.ipos = k;
mas01cr@498 604 qstate->accumulator->add_point(&r);
mas01cr@239 605 }
mas01cr@239 606 }
mas01cr@239 607 }
mas01cr@239 608 }
mas01cr@239 609 } // Duration match
mas01cr@498 610 audiodb_delete_arrays(track, qpointers.nvectors, D, DD);
mas01cr@239 611 }
mas01cr@239 612 }
mas01cr@239 613
mas01cr@498 614 loop_finish:
mas01cr@498 615
mas01cr@239 616 free(data_buffer);
mas01cr@239 617
mas01cr@239 618 // Clean up
mas01cr@239 619 if(query_data)
mas01cr@239 620 delete[] query_data;
mas01cr@498 621 if(qpointers.l2norm_data)
mas01cr@498 622 delete[] qpointers.l2norm_data;
mas01cr@498 623 if(qpointers.power_data)
mas01cr@498 624 delete[] qpointers.power_data;
mas01cr@498 625 if(qpointers.mean_duration)
mas01cr@498 626 delete[] qpointers.mean_duration;
mas01cr@498 627 if(dbpointers.power_data)
mas01cr@498 628 delete[] dbpointers.power_data;
mas01cr@498 629 if(dbpointers.l2norm_data)
mas01cr@498 630 delete[] dbpointers.l2norm_data;
mas01cr@239 631 if(D)
mas01cr@239 632 delete[] D;
mas01cr@239 633 if(DD)
mas01cr@239 634 delete[] DD;
mas01cr@498 635 if(dbpointers.mean_duration)
mas01cr@498 636 delete[] dbpointers.mean_duration;
mas01cr@498 637
mas01cr@498 638 return 0;
mas01cr@239 639 }